{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:NYUL3JEUC4R5KGIB3A2D6NQVFY","short_pith_number":"pith:NYUL3JEU","schema_version":"1.0","canonical_sha256":"6e28bda4941723d51901d8343f36152e326d5b7d52bb59314e0cceb0e9fa1b41","source":{"kind":"arxiv","id":"2510.16732","version":3},"attestation_state":"computed","paper":{"title":"A Comprehensive Survey on World Models for Embodied AI","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Le Zhang, Min Wu, Xiaoli Li, Xin He, Xinqing Li, Yun Liu","submitted_at":"2025-10-19T07:12:32Z","abstract_excerpt":"Embodied AI requires agents that perceive, act, and anticipate how actions reshape future world states. World models serve as internal simulators that capture environment dynamics, enabling forward and counterfactual rollouts to support perception, prediction, and decision making. This survey presents a unified framework for world models in embodied AI. Specifically, we formalize the problem setting and learning objectives, and propose a three-axis taxonomy encompassing: (1) Functionality, Decision-Coupled vs. General-Purpose; (2) Temporal Modeling, Sequential Simulation and Inference vs. Glob"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.16732","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-10-19T07:12:32Z","cross_cats_sorted":[],"title_canon_sha256":"db75d83b7d2bfa5b30ffb2948a288b784405acf326e77bcaf145ec40d26a40ba","abstract_canon_sha256":"4fa636bdbf0942bddb839f009a27196fd5adb1aee0126473060c603959b647df"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-29T00:14:00.552818Z","signature_b64":"CRm2BcC4Jdg126DSWarhnMTDVmZrMEpSiNRDbzh1Xq4ssLcaW3Ce61xYm+4D5KqisKKgdfycY2k9cu+elw3SAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6e28bda4941723d51901d8343f36152e326d5b7d52bb59314e0cceb0e9fa1b41","last_reissued_at":"2026-06-29T00:14:00.552344Z","signature_status":"signed_v1","first_computed_at":"2026-06-29T00:14:00.552344Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Comprehensive Survey on World Models for Embodied AI","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Le Zhang, Min Wu, Xiaoli Li, Xin He, Xinqing Li, Yun Liu","submitted_at":"2025-10-19T07:12:32Z","abstract_excerpt":"Embodied AI requires agents that perceive, act, and anticipate how actions reshape future world states. World models serve as internal simulators that capture environment dynamics, enabling forward and counterfactual rollouts to support perception, prediction, and decision making. This survey presents a unified framework for world models in embodied AI. Specifically, we formalize the problem setting and learning objectives, and propose a three-axis taxonomy encompassing: (1) Functionality, Decision-Coupled vs. General-Purpose; (2) Temporal Modeling, Sequential Simulation and Inference vs. Glob"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.16732","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.16732/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.16732","created_at":"2026-06-29T00:14:00.552402+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.16732v3","created_at":"2026-06-29T00:14:00.552402+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.16732","created_at":"2026-06-29T00:14:00.552402+00:00"},{"alias_kind":"pith_short_12","alias_value":"NYUL3JEUC4R5","created_at":"2026-06-29T00:14:00.552402+00:00"},{"alias_kind":"pith_short_16","alias_value":"NYUL3JEUC4R5KGIB","created_at":"2026-06-29T00:14:00.552402+00:00"},{"alias_kind":"pith_short_8","alias_value":"NYUL3JEU","created_at":"2026-06-29T00:14:00.552402+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":18,"internal_anchor_count":18,"sample":[{"citing_arxiv_id":"2606.00133","citing_title":"World Models: A Comprehensive Survey of Architectures, Methodologies, Reasoning Paradigms, and Applications","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.23856","citing_title":"Point Tracking Improves World Action Models","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17912","citing_title":"WorldArena 2.0: Extending Embodied World Model Benchmarking on Modality, Functionality and Platform","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19242","citing_title":"PhyWorld: Physics-Faithful World Model for Video Generation","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2602.11075","citing_title":"RISE: Self-Improving Robot Policy with Compositional World Model","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02900","citing_title":"Safety in Embodied AI: A Survey of Risks, Attacks, and Defenses","ref_index":205,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10426","citing_title":"CoWorld-VLA: Thinking in a Multi-Expert World Model for Autonomous Driving","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2604.01346","citing_title":"Safety, Security, and Cognitive Risks in World Models","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12090","citing_title":"World Action Models: The Next Frontier in Embodied AI","ref_index":274,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11550","citing_title":"The DAWN of World-Action Interactive Models","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2604.26848","citing_title":"STARRY: Spatial-Temporal Action-Centric World Modeling for Robotic Manipulation","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10426","citing_title":"CoWorld-VLA: Thinking in a Multi-Expert World Model for Autonomous Driving","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2604.22748","citing_title":"Agentic World Modeling: Foundations, Capabilities, Laws, and Beyond","ref_index":227,"is_internal_anchor":true},{"citing_arxiv_id":"2604.11351","citing_title":"WM-DAgger: Enabling Efficient Data Aggregation for Imitation Learning with World Models","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08209","citing_title":"OmniJigsaw: Enhancing Omni-Modal Reasoning via Modality-Orchestrated Reordering","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07392","citing_title":"Event-Centric World Modeling with Memory-Augmented Retrieval for Embodied Decision-Making","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04707","citing_title":"OpenWorldLib: A Unified Codebase and Definition of Advanced World Models","ref_index":70,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16592","citing_title":"Human Cognition in Machines: A Unified Perspective of World Models","ref_index":101,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY","json":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY.json","graph_json":"https://pith.science/api/pith-number/NYUL3JEUC4R5KGIB3A2D6NQVFY/graph.json","events_json":"https://pith.science/api/pith-number/NYUL3JEUC4R5KGIB3A2D6NQVFY/events.json","paper":"https://pith.science/paper/NYUL3JEU"},"agent_actions":{"view_html":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY","download_json":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY.json","view_paper":"https://pith.science/paper/NYUL3JEU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.16732&json=true","fetch_graph":"https://pith.science/api/pith-number/NYUL3JEUC4R5KGIB3A2D6NQVFY/graph.json","fetch_events":"https://pith.science/api/pith-number/NYUL3JEUC4R5KGIB3A2D6NQVFY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY/action/storage_attestation","attest_author":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY/action/author_attestation","sign_citation":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY/action/citation_signature","submit_replication":"https://pith.science/pith/NYUL3JEUC4R5KGIB3A2D6NQVFY/action/replication_record"}},"created_at":"2026-06-29T00:14:00.552402+00:00","updated_at":"2026-06-29T00:14:00.552402+00:00"}