{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:PLLONZYPVDQ4BBZFCBNN3EB4NV","short_pith_number":"pith:PLLONZYP","schema_version":"1.0","canonical_sha256":"7ad6e6e70fa8e1c08725105add903c6d7be889cc5781c2cfb03f7bd7f4bb3123","source":{"kind":"arxiv","id":"2604.01001","version":2},"attestation_state":"computed","paper":{"title":"EgoSim: Egocentric World Simulator for Embodied Interaction Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Hongrui Zhu, Jiafei Cao, Jiangmiao Pang, Jinkun Hao, Lizhuang Ma, Mingda Jia, Ran Yi, Ruiyan Wang, Xihui Liu, Xudong Xu","submitted_at":"2026-04-01T15:00:46Z","abstract_excerpt":"We introduce EgoSim, a closed-loop egocentric world simulator that generates spatially consistent interaction videos and persistently updates the underlying 3D scene state for continuous simulation. Existing egocentric simulators either lack explicit 3D grounding, causing structural drift under viewpoint changes, or treat the scene as static, failing to update world states across multi-stage interactions. EgoSim addresses both limitations by modeling 3D scenes as updatable world states. We generate embodiment interactions via a Geometry-action-aware Observation Simulation model, with spatial c"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2604.01001","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-04-01T15:00:46Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"7a42f5fdb9c07035c41412a830bbcb03f839bcbf44cdb69e8a10959641ebea5d","abstract_canon_sha256":"293ea0f1510218f3c29e656d4c164cd8396dc45ae265d8b253f9356f2af00b20"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:17:30.284502Z","signature_b64":"GxUJxwwMM5BYa1RgzKvtIKSEpVk2Wg9+wrGjK/8mg9cpLtOudDGpBURLE3Bs5TlKOL3a/u9fc39fA5sV8v62AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7ad6e6e70fa8e1c08725105add903c6d7be889cc5781c2cfb03f7bd7f4bb3123","last_reissued_at":"2026-07-02T01:17:30.283970Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:17:30.283970Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"EgoSim: Egocentric World Simulator for Embodied Interaction Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Hongrui Zhu, Jiafei Cao, Jiangmiao Pang, Jinkun Hao, Lizhuang Ma, Mingda Jia, Ran Yi, Ruiyan Wang, Xihui Liu, Xudong Xu","submitted_at":"2026-04-01T15:00:46Z","abstract_excerpt":"We introduce EgoSim, a closed-loop egocentric world simulator that generates spatially consistent interaction videos and persistently updates the underlying 3D scene state for continuous simulation. Existing egocentric simulators either lack explicit 3D grounding, causing structural drift under viewpoint changes, or treat the scene as static, failing to update world states across multi-stage interactions. EgoSim addresses both limitations by modeling 3D scenes as updatable world states. We generate embodiment interactions via a Geometry-action-aware Observation Simulation model, with spatial c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.01001","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.01001/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.01001","created_at":"2026-07-02T01:17:30.284015+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.01001v2","created_at":"2026-07-02T01:17:30.284015+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.01001","created_at":"2026-07-02T01:17:30.284015+00:00"},{"alias_kind":"pith_short_12","alias_value":"PLLONZYPVDQ4","created_at":"2026-07-02T01:17:30.284015+00:00"},{"alias_kind":"pith_short_16","alias_value":"PLLONZYPVDQ4BBZF","created_at":"2026-07-02T01:17:30.284015+00:00"},{"alias_kind":"pith_short_8","alias_value":"PLLONZYP","created_at":"2026-07-02T01:17:30.284015+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"2607.02075","citing_title":"HandsOnWorld: Unconstrained Egocentric Video Generation with Camera-Disentangled Hand Control","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2606.07326","citing_title":"AnchorWorld: Embodied Egocentric World Simulation with View-based Evolution Customization","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18214","citing_title":"EgoInteract: Synthetic Egocentric Videos Generation for Interaction Understanding and Anticipation","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18214","citing_title":"EgoInteract: Synthetic Egocentric Videos Generation for Interaction Understanding and Anticipation","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19728","citing_title":"Aero-World: Action-Conditioned Aerial Video Generation from Inertial Controls","ref_index":13,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV","json":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV.json","graph_json":"https://pith.science/api/pith-number/PLLONZYPVDQ4BBZFCBNN3EB4NV/graph.json","events_json":"https://pith.science/api/pith-number/PLLONZYPVDQ4BBZFCBNN3EB4NV/events.json","paper":"https://pith.science/paper/PLLONZYP"},"agent_actions":{"view_html":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV","download_json":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV.json","view_paper":"https://pith.science/paper/PLLONZYP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.01001&json=true","fetch_graph":"https://pith.science/api/pith-number/PLLONZYPVDQ4BBZFCBNN3EB4NV/graph.json","fetch_events":"https://pith.science/api/pith-number/PLLONZYPVDQ4BBZFCBNN3EB4NV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV/action/storage_attestation","attest_author":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV/action/author_attestation","sign_citation":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV/action/citation_signature","submit_replication":"https://pith.science/pith/PLLONZYPVDQ4BBZFCBNN3EB4NV/action/replication_record"}},"created_at":"2026-07-02T01:17:30.284015+00:00","updated_at":"2026-07-02T01:17:30.284015+00:00"}