{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:AUPNHARLMHLDA7RK7LG5N4CMBW","short_pith_number":"pith:AUPNHARL","schema_version":"1.0","canonical_sha256":"051ed3822b61d6307e2afacdd6f04c0dbf3c3a65088a5065d8fd2897bfb73d38","source":{"kind":"arxiv","id":"2606.07304","version":1},"attestation_state":"computed","paper":{"title":"CAPE: Contrastive Action-conditioned Parallel Encoding for Embodied Planning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Cong Chen, Haowen Wang, Pei Ren, Zhengping Che, Zhixiang Zhang","submitted_at":"2026-06-05T14:21:44Z","abstract_excerpt":"Embodied agents need to predict the future consequences of candidate actions in order to plan effectively before execution. Existing visual dynamics models learn by reconstructing future visual states or rolling out dense latent representations, which spreads learning capacity across visually salient but planning-irrelevant content rather than the action-conditioned changes that drive manipulation outcomes. We propose CAPE, a Contrastive Action-conditioned Parallel Encoding framework that learns visual dynamics by distinguishing the future outcomes induced by different action sequences. Given "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.07304","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-05T14:21:44Z","cross_cats_sorted":[],"title_canon_sha256":"d77b4d2048ebcb88af1e906cbac53535215b6668c59834cb296fa743df9cfbb4","abstract_canon_sha256":"1731911d44e88cc04e685fe92ffbca6dbcec031a5891ed6e10134ebefac69707"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:05:18.702667Z","signature_b64":"cwr2hBq60Bvoc+mjvSAJ0q8rRZr1jrTIJ26+We1yHKVY+us+qLYyRo3sV8nfzIXj184J5JqZN6TQNfJhVVrNBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"051ed3822b61d6307e2afacdd6f04c0dbf3c3a65088a5065d8fd2897bfb73d38","last_reissued_at":"2026-06-08T01:05:18.701932Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:05:18.701932Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CAPE: Contrastive Action-conditioned Parallel Encoding for Embodied Planning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Cong Chen, Haowen Wang, Pei Ren, Zhengping Che, Zhixiang Zhang","submitted_at":"2026-06-05T14:21:44Z","abstract_excerpt":"Embodied agents need to predict the future consequences of candidate actions in order to plan effectively before execution. Existing visual dynamics models learn by reconstructing future visual states or rolling out dense latent representations, which spreads learning capacity across visually salient but planning-irrelevant content rather than the action-conditioned changes that drive manipulation outcomes. We propose CAPE, a Contrastive Action-conditioned Parallel Encoding framework that learns visual dynamics by distinguishing the future outcomes induced by different action sequences. Given "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07304","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.07304/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.07304","created_at":"2026-06-08T01:05:18.702039+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.07304v1","created_at":"2026-06-08T01:05:18.702039+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07304","created_at":"2026-06-08T01:05:18.702039+00:00"},{"alias_kind":"pith_short_12","alias_value":"AUPNHARLMHLD","created_at":"2026-06-08T01:05:18.702039+00:00"},{"alias_kind":"pith_short_16","alias_value":"AUPNHARLMHLDA7RK","created_at":"2026-06-08T01:05:18.702039+00:00"},{"alias_kind":"pith_short_8","alias_value":"AUPNHARL","created_at":"2026-06-08T01:05:18.702039+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW","json":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW.json","graph_json":"https://pith.science/api/pith-number/AUPNHARLMHLDA7RK7LG5N4CMBW/graph.json","events_json":"https://pith.science/api/pith-number/AUPNHARLMHLDA7RK7LG5N4CMBW/events.json","paper":"https://pith.science/paper/AUPNHARL"},"agent_actions":{"view_html":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW","download_json":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW.json","view_paper":"https://pith.science/paper/AUPNHARL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.07304&json=true","fetch_graph":"https://pith.science/api/pith-number/AUPNHARLMHLDA7RK7LG5N4CMBW/graph.json","fetch_events":"https://pith.science/api/pith-number/AUPNHARLMHLDA7RK7LG5N4CMBW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW/action/storage_attestation","attest_author":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW/action/author_attestation","sign_citation":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW/action/citation_signature","submit_replication":"https://pith.science/pith/AUPNHARLMHLDA7RK7LG5N4CMBW/action/replication_record"}},"created_at":"2026-06-08T01:05:18.702039+00:00","updated_at":"2026-06-08T01:05:18.702039+00:00"}