{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ARBVRGABDDJDHDYL6XZYPZOGR5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1a6cd3e03af084f48ffdcf50881a65d5b864e4710fcdab89673d7dee917eb1e0","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T09:51:25Z","title_canon_sha256":"e0b721fc0e39ea961a058494ddb8bad376eb683673fecc50f0047a6bf7c25ad2"},"schema_version":"1.0","source":{"id":"2606.07100","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07100","created_at":"2026-06-08T01:04:46Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07100v1","created_at":"2026-06-08T01:04:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07100","created_at":"2026-06-08T01:04:46Z"},{"alias_kind":"pith_short_12","alias_value":"ARBVRGABDDJD","created_at":"2026-06-08T01:04:46Z"},{"alias_kind":"pith_short_16","alias_value":"ARBVRGABDDJDHDYL","created_at":"2026-06-08T01:04:46Z"},{"alias_kind":"pith_short_8","alias_value":"ARBVRGAB","created_at":"2026-06-08T01:04:46Z"}],"graph_snapshots":[{"event_id":"sha256:e25bc27e62e154ed1fc831fc696cc900c90eea14986a30ff20c60e67014f9ee6","target":"graph","created_at":"2026-06-08T01:04:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.07100/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Visual-language action (VLA) models enable robots to predict actions directly from observations and language instructions, but their performance depends on large-scale, high-quality data and is limited by the scarcity of real-world robot action datasets. To facilitate VLA model learning with abundant unlabeled human videos, Latent Action Models (LAM) learn latent action representations from visual dynamics to provide additional supervision for VLA learning. However, LAM and VLA are typically trained separately, leaving LAM ungrounded during VLA training and VLA models constrained by frozen LAM","authors_text":"Baoxiong Jia, Jiangyong Huang, Jingze Zhang, Mengya Liu, Siyuan Huang","cross_cats":["cs.RO"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T09:51:25Z","title":"LARA: Latent Action Representation Alignment for Vision-Language-Action Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07100","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2a727ae84109d019239c5fc955944db4d5d5b9959317056193dd0d7b8b34fdb4","target":"record","created_at":"2026-06-08T01:04:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1a6cd3e03af084f48ffdcf50881a65d5b864e4710fcdab89673d7dee917eb1e0","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T09:51:25Z","title_canon_sha256":"e0b721fc0e39ea961a058494ddb8bad376eb683673fecc50f0047a6bf7c25ad2"},"schema_version":"1.0","source":{"id":"2606.07100","kind":"arxiv","version":1}},"canonical_sha256":"044358980118d2338f0bf5f387e5c68f4aa00d1578e52591d79c61ef64933dc1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"044358980118d2338f0bf5f387e5c68f4aa00d1578e52591d79c61ef64933dc1","first_computed_at":"2026-06-08T01:04:46.202736Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:04:46.202736Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8RQHQy5eBvsJWQDvyEPQZGSnhv1RORXYHqwjjkpvmUlZEBFElIQR/Vg0DPv9yDAeXHV0xJMj2KLiBByf4qhuDg==","signature_status":"signed_v1","signed_at":"2026-06-08T01:04:46.203485Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.07100","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2a727ae84109d019239c5fc955944db4d5d5b9959317056193dd0d7b8b34fdb4","sha256:e25bc27e62e154ed1fc831fc696cc900c90eea14986a30ff20c60e67014f9ee6"],"state_sha256":"552535f14be606667d7565bfb08d369446d333143bdd42e3e2e4380a13bcd8c7"}