{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:XUQURLBNPFXHSCFUI44SRDVDTQ","short_pith_number":"pith:XUQURLBN","canonical_record":{"source":{"id":"2512.00883","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2025-11-30T13:11:56Z","cross_cats_sorted":["cs.CV","cs.SD"],"title_canon_sha256":"2adb5fca45fdd4780fbf4e97b4ace3bf9c69230f3a62214b442b2e0c38d78a59","abstract_canon_sha256":"d61a909fd1d09b6a9a9a85fe6f3a2309a2d7d4d3987e7bc98023a63139ec566c"},"schema_version":"1.0"},"canonical_sha256":"bd2148ac2d796e7908b44739288ea39c0c10298d054e47e7d8c347323274d846","source":{"kind":"arxiv","id":"2512.00883","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.00883","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"arxiv_version","alias_value":"2512.00883v3","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.00883","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"pith_short_12","alias_value":"XUQURLBNPFXH","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"pith_short_16","alias_value":"XUQURLBNPFXHSCFU","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"pith_short_8","alias_value":"XUQURLBN","created_at":"2026-06-08T01:03:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:XUQURLBNPFXHSCFUI44SRDVDTQ","target":"record","payload":{"canonical_record":{"source":{"id":"2512.00883","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2025-11-30T13:11:56Z","cross_cats_sorted":["cs.CV","cs.SD"],"title_canon_sha256":"2adb5fca45fdd4780fbf4e97b4ace3bf9c69230f3a62214b442b2e0c38d78a59","abstract_canon_sha256":"d61a909fd1d09b6a9a9a85fe6f3a2309a2d7d4d3987e7bc98023a63139ec566c"},"schema_version":"1.0"},"canonical_sha256":"bd2148ac2d796e7908b44739288ea39c0c10298d054e47e7d8c347323274d846","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:03:51.962517Z","signature_b64":"DG9a4g93QpH/F5Echxv+OnVyUwHjGxSitcvcWRKAz3NBLMDLeJrPp989OEF6Hj0RVQoB7hliPeGW2pQ9cDYVBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bd2148ac2d796e7908b44739288ea39c0c10298d054e47e7d8c347323274d846","last_reissued_at":"2026-06-08T01:03:51.961602Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:03:51.961602Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2512.00883","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:03:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vxTYqj3a0X1sL0g6dwKxqd5z6WLH3F67V304guK5LUGqiw7jfw23MmJ3+D4lvUi7xPz2Aj1OWnzFQvhnKngEAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T22:05:57.066919Z"},"content_sha256":"29c6be49433e7178ee8a25c5ce0629b827b7f7079f7ceef6eaccedae83b2f7e7","schema_version":"1.0","event_id":"sha256:29c6be49433e7178ee8a25c5ce0629b827b7f7079f7ceef6eaccedae83b2f7e7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:XUQURLBNPFXHSCFUI44SRDVDTQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Audio-Visual World Models: Grounding Multisensory Imagination for Embodied Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.SD"],"primary_cat":"cs.MM","authors_text":"Jiahua Wang, Jialong Wu, Leqi Zheng, Shijie Cheng, Yaoxin Mao","submitted_at":"2025-11-30T13:11:56Z","abstract_excerpt":"World models simulate environmental dynamics to enable agents to plan and reason about future states. While existing approaches have primarily focused on visual observations, real-world perception inherently involves multiple sensory modalities. Audio provides crucial spatial and temporal cues such as sound source localization and acoustic scene properties, yet its integration into world models remains relatively underexplored. Prior work has not established a commonly adopted formulation for audio-visual world modeling under low-level action control or clarified how to jointly capture physica"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.00883","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.00883/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:03:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rJgdFvBb4s87aEeZIWe4XZHhNXBsjhh7cylCM2anPtfLjCtFh8OXwADupvMNovQMIr9t3EL+bMtIQRVFUTTbCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T22:05:57.067511Z"},"content_sha256":"307ee76b3d7131b6c26a4715d17e3fdc9022b225fec63a7546f72d0867d9dc63","schema_version":"1.0","event_id":"sha256:307ee76b3d7131b6c26a4715d17e3fdc9022b225fec63a7546f72d0867d9dc63"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XUQURLBNPFXHSCFUI44SRDVDTQ/bundle.json","state_url":"https://pith.science/pith/XUQURLBNPFXHSCFUI44SRDVDTQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XUQURLBNPFXHSCFUI44SRDVDTQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T22:05:57Z","links":{"resolver":"https://pith.science/pith/XUQURLBNPFXHSCFUI44SRDVDTQ","bundle":"https://pith.science/pith/XUQURLBNPFXHSCFUI44SRDVDTQ/bundle.json","state":"https://pith.science/pith/XUQURLBNPFXHSCFUI44SRDVDTQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XUQURLBNPFXHSCFUI44SRDVDTQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:XUQURLBNPFXHSCFUI44SRDVDTQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d61a909fd1d09b6a9a9a85fe6f3a2309a2d7d4d3987e7bc98023a63139ec566c","cross_cats_sorted":["cs.CV","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2025-11-30T13:11:56Z","title_canon_sha256":"2adb5fca45fdd4780fbf4e97b4ace3bf9c69230f3a62214b442b2e0c38d78a59"},"schema_version":"1.0","source":{"id":"2512.00883","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.00883","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"arxiv_version","alias_value":"2512.00883v3","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.00883","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"pith_short_12","alias_value":"XUQURLBNPFXH","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"pith_short_16","alias_value":"XUQURLBNPFXHSCFU","created_at":"2026-06-08T01:03:51Z"},{"alias_kind":"pith_short_8","alias_value":"XUQURLBN","created_at":"2026-06-08T01:03:51Z"}],"graph_snapshots":[{"event_id":"sha256:307ee76b3d7131b6c26a4715d17e3fdc9022b225fec63a7546f72d0867d9dc63","target":"graph","created_at":"2026-06-08T01:03:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.00883/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"World models simulate environmental dynamics to enable agents to plan and reason about future states. While existing approaches have primarily focused on visual observations, real-world perception inherently involves multiple sensory modalities. Audio provides crucial spatial and temporal cues such as sound source localization and acoustic scene properties, yet its integration into world models remains relatively underexplored. Prior work has not established a commonly adopted formulation for audio-visual world modeling under low-level action control or clarified how to jointly capture physica","authors_text":"Jiahua Wang, Jialong Wu, Leqi Zheng, Shijie Cheng, Yaoxin Mao","cross_cats":["cs.CV","cs.SD"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2025-11-30T13:11:56Z","title":"Audio-Visual World Models: Grounding Multisensory Imagination for Embodied Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.00883","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:29c6be49433e7178ee8a25c5ce0629b827b7f7079f7ceef6eaccedae83b2f7e7","target":"record","created_at":"2026-06-08T01:03:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d61a909fd1d09b6a9a9a85fe6f3a2309a2d7d4d3987e7bc98023a63139ec566c","cross_cats_sorted":["cs.CV","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MM","submitted_at":"2025-11-30T13:11:56Z","title_canon_sha256":"2adb5fca45fdd4780fbf4e97b4ace3bf9c69230f3a62214b442b2e0c38d78a59"},"schema_version":"1.0","source":{"id":"2512.00883","kind":"arxiv","version":3}},"canonical_sha256":"bd2148ac2d796e7908b44739288ea39c0c10298d054e47e7d8c347323274d846","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bd2148ac2d796e7908b44739288ea39c0c10298d054e47e7d8c347323274d846","first_computed_at":"2026-06-08T01:03:51.961602Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:03:51.961602Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"DG9a4g93QpH/F5Echxv+OnVyUwHjGxSitcvcWRKAz3NBLMDLeJrPp989OEF6Hj0RVQoB7hliPeGW2pQ9cDYVBg==","signature_status":"signed_v1","signed_at":"2026-06-08T01:03:51.962517Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.00883","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:29c6be49433e7178ee8a25c5ce0629b827b7f7079f7ceef6eaccedae83b2f7e7","sha256:307ee76b3d7131b6c26a4715d17e3fdc9022b225fec63a7546f72d0867d9dc63"],"state_sha256":"5c011d5909b272140d03b887063743107d9de75c1ccc87ad295da26a6ab23014"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ci2V06B1GVxN6fSEvpw99wHLQg3TgD5mVefDCKhYwf01SjF/nGNEA0F0aCETu9904sQ4I77m7Ws/TLsE/qlNDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T22:05:57.069987Z","bundle_sha256":"a2907f0d614703808490ebb7377300ba0818d78a99fa253956546b56391be73e"}}