{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AWPT3FGY5YFR4HY7OWF3VRO2S7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"43c272f046a1cec2dd6b01d882b9bd9af69370d0a4c9c80edcd1bdccd81a8f51","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T17:57:06Z","title_canon_sha256":"f1b5308c2d711e4f0f9a8dff7dc93511f9906c78a05f4f710142c9d0a738b444"},"schema_version":"1.0","source":{"id":"2606.12396","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.12396","created_at":"2026-06-11T02:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2606.12396v1","created_at":"2026-06-11T02:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12396","created_at":"2026-06-11T02:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"AWPT3FGY5YFR","created_at":"2026-06-11T02:09:49Z"},{"alias_kind":"pith_short_16","alias_value":"AWPT3FGY5YFR4HY7","created_at":"2026-06-11T02:09:49Z"},{"alias_kind":"pith_short_8","alias_value":"AWPT3FGY","created_at":"2026-06-11T02:09:49Z"}],"graph_snapshots":[{"event_id":"sha256:811a911a284e92c51d706d3ec820d4a77b30112d396bf36d592b670ee601cab3","target":"graph","created_at":"2026-06-11T02:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.12396/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Vision-language-action (VLA) models can describe scenes and reason about them in language, yet still struggle to ground their actions in the dense 3D world around them. Existing approaches either inject features from a frozen 3D foundation model without an objective that ensures the policy uses them, or constrain geometry with sparse box and map losses that provide no dense spatial signal. We introduce VLGA, the first vision-language-action model supervised to reconstruct the dense 3D world it drives through. VLGA introduces geometry as a fourth modality alongside vision, language, and action ","authors_text":"Burhan Yaman, Danhua Guo, Dhruva Dixith Kurra, Jin Yao, Tom Lampo, Zezhou Cheng","cross_cats":["cs.RO"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T17:57:06Z","title":"VLGA: Vision-Language-Geometry-Action Models for Autonomous Driving"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12396","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:845b3a4732d923fbdb80ab0725611ca591ba84fbec8fc9e29e5a3db2cb477591","target":"record","created_at":"2026-06-11T02:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"43c272f046a1cec2dd6b01d882b9bd9af69370d0a4c9c80edcd1bdccd81a8f51","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T17:57:06Z","title_canon_sha256":"f1b5308c2d711e4f0f9a8dff7dc93511f9906c78a05f4f710142c9d0a738b444"},"schema_version":"1.0","source":{"id":"2606.12396","kind":"arxiv","version":1}},"canonical_sha256":"059f3d94d8ee0b1e1f1f758bbac5da97d880419d3be79dd9012ea419c4c6339b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"059f3d94d8ee0b1e1f1f758bbac5da97d880419d3be79dd9012ea419c4c6339b","first_computed_at":"2026-06-11T02:09:49.700411Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T02:09:49.700411Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0En8Ng5hiX7HVvvgAkZ0zXMOdx7GVpiFYJLk9iJPs+2pZcolKeb2U1T3RPX1Ake1J3YCJOAFYGRKzq9+uPhTAw==","signature_status":"signed_v1","signed_at":"2026-06-11T02:09:49.700812Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.12396","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:845b3a4732d923fbdb80ab0725611ca591ba84fbec8fc9e29e5a3db2cb477591","sha256:811a911a284e92c51d706d3ec820d4a77b30112d396bf36d592b670ee601cab3"],"state_sha256":"e85b9cb16d2f4d43d8f0b40be2ba04a10dcc83672090c48426a96686b5318f96"}