{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NHBJR2KXA255RVHLGMI4YP3WFF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9688b51dcfa062741ceb16cf39c9a5e81aa0f2f51183c680753a05c99b8dd0d2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T04:24:28Z","title_canon_sha256":"bc2b9153ec50675d219e8fb02393da75165e6be4fcf4dee2cdae74790213c263"},"schema_version":"1.0","source":{"id":"2606.19776","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.19776","created_at":"2026-06-19T16:12:34Z"},{"alias_kind":"arxiv_version","alias_value":"2606.19776v1","created_at":"2026-06-19T16:12:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19776","created_at":"2026-06-19T16:12:34Z"},{"alias_kind":"pith_short_12","alias_value":"NHBJR2KXA255","created_at":"2026-06-19T16:12:34Z"},{"alias_kind":"pith_short_16","alias_value":"NHBJR2KXA255RVHL","created_at":"2026-06-19T16:12:34Z"},{"alias_kind":"pith_short_8","alias_value":"NHBJR2KX","created_at":"2026-06-19T16:12:34Z"}],"graph_snapshots":[{"event_id":"sha256:fcc967a863132b74fde2dfbc53b5f204f03577b163567d04ef85c094b8679f76","target":"graph","created_at":"2026-06-19T16:12:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.19776/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recently, vision-language models (VLMs) have made significant progress in 3D scene understanding, driving advances in applications such as embodied intelligence and robotic vision. However, existing approaches typically either rely directly on explicit 3D inputs (e.g., point clouds or RGB-D sequences), or introduce an additional 3D geometry encoder to derive 3D-aware visual tokens from 2D images. Such designs structurally decouple 3D geometric perception from the rich 2D semantics learned via vision-language pre-training, hindering the development of a unified 3D vision-language representation","authors_text":"Jianing Li, Li Du, Yijiang Liu, Zhou Fang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T04:24:28Z","title":"Occ-VLM: Occupancy Grounded Vision Language Model for Indoor Scene Understanding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19776","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:19c61088b0ec2477836d52fffecd860ba4b2d76122c013e3b447d1a7d0e575c2","target":"record","created_at":"2026-06-19T16:12:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9688b51dcfa062741ceb16cf39c9a5e81aa0f2f51183c680753a05c99b8dd0d2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T04:24:28Z","title_canon_sha256":"bc2b9153ec50675d219e8fb02393da75165e6be4fcf4dee2cdae74790213c263"},"schema_version":"1.0","source":{"id":"2606.19776","kind":"arxiv","version":1}},"canonical_sha256":"69c298e95706bbd8d4eb3311cc3f7629495bf29a4366bc43c7f90d0fc0a9b58f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"69c298e95706bbd8d4eb3311cc3f7629495bf29a4366bc43c7f90d0fc0a9b58f","first_computed_at":"2026-06-19T16:12:34.861977Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:12:34.861977Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JTX+ZI7FKzhLEWZ5exIdKFrKnHg1MeBLU5rV3Kl/u3cYvW6qPzbtsQE7SBmkWW0mnHPCfY/y2wNQ+zm4dbXpCA==","signature_status":"signed_v1","signed_at":"2026-06-19T16:12:34.862375Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.19776","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:19c61088b0ec2477836d52fffecd860ba4b2d76122c013e3b447d1a7d0e575c2","sha256:fcc967a863132b74fde2dfbc53b5f204f03577b163567d04ef85c094b8679f76"],"state_sha256":"87a14f4dea41c31ab019290deb878627d66f3c1d55d09f0cc94b4abbdcb0992b"}