{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2CIW7WAAOYMSWMBIKFILOJ2CZ7","short_pith_number":"pith:2CIW7WAA","schema_version":"1.0","canonical_sha256":"d0916fd80076192b30285150b72742cffbc3e7971a6bcf37918ce1ca6e98f47c","source":{"kind":"arxiv","id":"2605.13586","version":1},"attestation_state":"computed","paper":{"title":"HetScene: Heterogeneity-Aware Diffusion for Dense Indoor Scene Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Decomposing objects into primary structural roles and secondary contextual roles enables a two-stage diffusion process that generates coherent dense indoor scenes.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Cheng Peng, Chi Wang, Jiamin Xu, Junming Huang, Rong Zhang, Weiwei Xu, Zini Chen","submitted_at":"2026-05-13T14:21:51Z","abstract_excerpt":"Generating controllable and physically plausible indoor scenes is a pivotal prerequisite for constructing high-fidelity simulation environments for embodied AI. However, existing deeplearning-based methods usually treat all objects as homogeneous instances within a unified generation process. While effective for sparse and simplistic layouts, they struggle to model realistic layouts with dense object arrangements and complex spatial dependencies, leadingto limited scalability and degraded physical plausibility. To deal with these challenges, we revisit indoor layout generation from the perspec"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.13586","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T14:21:51Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d83b15e5ac96af553e76084ea1db957bfbd3b6e8a6739f8f84f5b98677d900d3","abstract_canon_sha256":"3d50f66816e3efce50e1ab4392e91dfd84cf1b45bb9e00f62252dd2cd580bbcc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:23.156466Z","signature_b64":"rWXh9UD/nmIh2jjUSIBfYsK8aV8MCB3CRFfqjHJVeTRA8OK54huLMUvNO8bMJw8yg5bQvBf1ekoDjE6RFqL1BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d0916fd80076192b30285150b72742cffbc3e7971a6bcf37918ce1ca6e98f47c","last_reissued_at":"2026-05-18T02:44:23.155999Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:23.155999Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"HetScene: Heterogeneity-Aware Diffusion for Dense Indoor Scene Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Decomposing objects into primary structural roles and secondary contextual roles enables a two-stage diffusion process that generates coherent dense indoor scenes.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Cheng Peng, Chi Wang, Jiamin Xu, Junming Huang, Rong Zhang, Weiwei Xu, Zini Chen","submitted_at":"2026-05-13T14:21:51Z","abstract_excerpt":"Generating controllable and physically plausible indoor scenes is a pivotal prerequisite for constructing high-fidelity simulation environments for embodied AI. However, existing deeplearning-based methods usually treat all objects as homogeneous instances within a unified generation process. While effective for sparse and simplistic layouts, they struggle to model realistic layouts with dense object arrangements and complex spatial dependencies, leadingto limited scalability and degraded physical plausibility. To deal with these challenges, we revisit indoor layout generation from the perspec"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"we propose HetScene, a heterogeneous two-stage generation framework that decouples indoor layout synthesis into Structural Layout Generation (SLG) and Contextual Layout Generation (CLG). SLG first generates globally coherent structural layouts with only primary objects conditioned on text descriptions, top-down binary room masks, and spatial relation graphs.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That objects can be reliably decomposed into primary and secondary categories based on distinct roles in shaping a scene, and that this decomposition will resolve issues with dense arrangements and complex spatial dependencies.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"HetScene proposes a two-stage heterogeneous diffusion framework that decomposes scenes into primary structural objects and secondary contextual objects to generate denser, more plausible indoor layouts.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Decomposing objects into primary structural roles and secondary contextual roles enables a two-stage diffusion process that generates coherent dense indoor scenes.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4b2ac3f85881889fc3eac2410b8d04ed9955db1d47b22ebf9643439e8b746848"},"source":{"id":"2605.13586","kind":"arxiv","version":1},"verdict":{"id":"eb8f8e42-02fd-42d6-94e1-c144ddaf0913","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T19:31:52.629862Z","strongest_claim":"we propose HetScene, a heterogeneous two-stage generation framework that decouples indoor layout synthesis into Structural Layout Generation (SLG) and Contextual Layout Generation (CLG). SLG first generates globally coherent structural layouts with only primary objects conditioned on text descriptions, top-down binary room masks, and spatial relation graphs.","one_line_summary":"HetScene proposes a two-stage heterogeneous diffusion framework that decomposes scenes into primary structural objects and secondary contextual objects to generate denser, more plausible indoor layouts.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That objects can be reliably decomposed into primary and secondary categories based on distinct roles in shaping a scene, and that this decomposition will resolve issues with dense arrangements and complex spatial dependencies.","pith_extraction_headline":"Decomposing objects into primary structural roles and secondary contextual roles enables a two-stage diffusion process that generates coherent dense indoor scenes."},"references":{"count":30,"sample":[{"doi":"10.1145/2185520.2185552","year":2012,"title":"Versatile rigid- fluid coupling for incompressible SPH.ACM Trans","work_id":"d44ce9d2-061d-4a53-85e5-983c31143b76","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.48550/arxiv.1808.08473","year":2018,"title":"Human-centric Indoor Scene Synthesis Using Stochastic Grammar","work_id":"999f1473-2443-42c1-8076-19b2ea7fd8cb","ref_index":2,"cited_arxiv_id":"1808.08473","is_internal_anchor":true},{"doi":"10.48550/arxiv.2406.11824","year":2024,"title":"2024.doi:10.48550/arXiv.2406.11824","work_id":"ee8b57d1-47b1-47f6-99e1-614068e80ba3","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1145/2010324","year":2011,"title":"Make It Home: Automatic Optimization of Furniture Arrangement","work_id":"d644b210-3d39-433e-8dc6-76b49f2ac0d9","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1145/1964921","year":2011,"title":"Interactive Furniture Layout Using Interior Design Guidelines","work_id":"422b0ff0-ba7e-4545-84b7-568adee17dae","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":30,"snapshot_sha256":"d307954cf0a1644217d1c489b2f9d61bac716674eaf19527652edb599ca96b03","internal_anchors":4},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.13586","created_at":"2026-05-18T02:44:23.156079+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.13586v1","created_at":"2026-05-18T02:44:23.156079+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13586","created_at":"2026-05-18T02:44:23.156079+00:00"},{"alias_kind":"pith_short_12","alias_value":"2CIW7WAAOYMS","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"2CIW7WAAOYMSWMBI","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"2CIW7WAA","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7","json":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7.json","graph_json":"https://pith.science/api/pith-number/2CIW7WAAOYMSWMBIKFILOJ2CZ7/graph.json","events_json":"https://pith.science/api/pith-number/2CIW7WAAOYMSWMBIKFILOJ2CZ7/events.json","paper":"https://pith.science/paper/2CIW7WAA"},"agent_actions":{"view_html":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7","download_json":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7.json","view_paper":"https://pith.science/paper/2CIW7WAA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.13586&json=true","fetch_graph":"https://pith.science/api/pith-number/2CIW7WAAOYMSWMBIKFILOJ2CZ7/graph.json","fetch_events":"https://pith.science/api/pith-number/2CIW7WAAOYMSWMBIKFILOJ2CZ7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7/action/storage_attestation","attest_author":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7/action/author_attestation","sign_citation":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7/action/citation_signature","submit_replication":"https://pith.science/pith/2CIW7WAAOYMSWMBIKFILOJ2CZ7/action/replication_record"}},"created_at":"2026-05-18T02:44:23.156079+00:00","updated_at":"2026-05-18T02:44:23.156079+00:00"}