{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:IYEC77GQ2ZRZA2AKYWKOWTKIDB","short_pith_number":"pith:IYEC77GQ","canonical_record":{"source":{"id":"2606.14752","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T09:39:39Z","cross_cats_sorted":["cs.AI","cs.LG","cs.RO"],"title_canon_sha256":"0e32000ec000e5c94c481fe3a1d649a31851463f7c7e2f6c6a88c6179b90e329","abstract_canon_sha256":"7a166cc9995932adf9760210f16526a946b4c72e538d6c48f29df2012a1c567d"},"schema_version":"1.0"},"canonical_sha256":"46082ffcd0d66390680ac594eb4d48185c9b454e768f1a097b45013f867e9b14","source":{"kind":"arxiv","id":"2606.14752","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.14752","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"arxiv_version","alias_value":"2606.14752v2","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.14752","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"pith_short_12","alias_value":"IYEC77GQ2ZRZ","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"pith_short_16","alias_value":"IYEC77GQ2ZRZA2AK","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"pith_short_8","alias_value":"IYEC77GQ","created_at":"2026-06-30T01:17:41Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:IYEC77GQ2ZRZA2AKYWKOWTKIDB","target":"record","payload":{"canonical_record":{"source":{"id":"2606.14752","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T09:39:39Z","cross_cats_sorted":["cs.AI","cs.LG","cs.RO"],"title_canon_sha256":"0e32000ec000e5c94c481fe3a1d649a31851463f7c7e2f6c6a88c6179b90e329","abstract_canon_sha256":"7a166cc9995932adf9760210f16526a946b4c72e538d6c48f29df2012a1c567d"},"schema_version":"1.0"},"canonical_sha256":"46082ffcd0d66390680ac594eb4d48185c9b454e768f1a097b45013f867e9b14","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T01:17:41.672971Z","signature_b64":"kyPdbg0HPUuJT1lbxe+XK1iLf+Bu2KqLl4db6ViLIrF2/0pcC6rU9d7QPtnFIMJuGfPExv0nI3gCKHikMfoDBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"46082ffcd0d66390680ac594eb4d48185c9b454e768f1a097b45013f867e9b14","last_reissued_at":"2026-06-30T01:17:41.672398Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T01:17:41.672398Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.14752","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T01:17:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zozH3ENpSukcKtUHWY+Zhz3RC/NrAvm9JTLhQoY+HN/1UCSEHwGHjjbo7WvMEtN2k/y1oJdBXaUejKzIr61ABg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T15:02:56.870845Z"},"content_sha256":"1432b9e5b00950ec71065e266ade849c10d179970b2fb3ed40697d64edeed70b","schema_version":"1.0","event_id":"sha256:1432b9e5b00950ec71065e266ade849c10d179970b2fb3ed40697d64edeed70b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:IYEC77GQ2ZRZA2AKYWKOWTKIDB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"X-Tokenizer: A Multimodal Action Tokenizer for Vision-Language-Action Pretraining","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.RO"],"primary_cat":"cs.CV","authors_text":"Dongxiu Liu, Hang Su, Hao Wang, Jinliang Zheng, Lights Shi, Lucy Liang, Miracle Kang, Pushi Zhang, Roy Gan, Shawn Qin, Sylas Chen, Xianyuan Zhan, Yinan Zheng","submitted_at":"2026-06-07T09:39:39Z","abstract_excerpt":"Modern Vision-Language-Action (VLA) models must bridge pretrained vision-language reasoning and precise continuous robot control. Existing action tokenizers discretize actions primarily for reconstruction, producing codes that preserve motion geometry but provide only weak semantic supervision to the backbone. We therefore formulate action tokenization not as mere compression, but as semantic interface learning between multimodal reasoning and executable control. To this end, we introduce X-Tokenizer, a lightweight encoder-Semantic Residual Quantization (SRQ)-decoder architecture that provides"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.14752","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.14752/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T01:17:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9kD+3eOE5lR0X6Oe29pl/F9kHYMTezSctHtOhbOzUGc4irax2P+AtFRFVljkufvv1MZ9NZ2K4TPP4vy2QpFXCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T15:02:56.871211Z"},"content_sha256":"ed6bb0df0f3bb918161d5633e7b8a4583d7371e1ffa7bd32acb20af408098847","schema_version":"1.0","event_id":"sha256:ed6bb0df0f3bb918161d5633e7b8a4583d7371e1ffa7bd32acb20af408098847"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IYEC77GQ2ZRZA2AKYWKOWTKIDB/bundle.json","state_url":"https://pith.science/pith/IYEC77GQ2ZRZA2AKYWKOWTKIDB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IYEC77GQ2ZRZA2AKYWKOWTKIDB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T15:02:56Z","links":{"resolver":"https://pith.science/pith/IYEC77GQ2ZRZA2AKYWKOWTKIDB","bundle":"https://pith.science/pith/IYEC77GQ2ZRZA2AKYWKOWTKIDB/bundle.json","state":"https://pith.science/pith/IYEC77GQ2ZRZA2AKYWKOWTKIDB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IYEC77GQ2ZRZA2AKYWKOWTKIDB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:IYEC77GQ2ZRZA2AKYWKOWTKIDB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7a166cc9995932adf9760210f16526a946b4c72e538d6c48f29df2012a1c567d","cross_cats_sorted":["cs.AI","cs.LG","cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T09:39:39Z","title_canon_sha256":"0e32000ec000e5c94c481fe3a1d649a31851463f7c7e2f6c6a88c6179b90e329"},"schema_version":"1.0","source":{"id":"2606.14752","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.14752","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"arxiv_version","alias_value":"2606.14752v2","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.14752","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"pith_short_12","alias_value":"IYEC77GQ2ZRZ","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"pith_short_16","alias_value":"IYEC77GQ2ZRZA2AK","created_at":"2026-06-30T01:17:41Z"},{"alias_kind":"pith_short_8","alias_value":"IYEC77GQ","created_at":"2026-06-30T01:17:41Z"}],"graph_snapshots":[{"event_id":"sha256:ed6bb0df0f3bb918161d5633e7b8a4583d7371e1ffa7bd32acb20af408098847","target":"graph","created_at":"2026-06-30T01:17:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.14752/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Modern Vision-Language-Action (VLA) models must bridge pretrained vision-language reasoning and precise continuous robot control. Existing action tokenizers discretize actions primarily for reconstruction, producing codes that preserve motion geometry but provide only weak semantic supervision to the backbone. We therefore formulate action tokenization not as mere compression, but as semantic interface learning between multimodal reasoning and executable control. To this end, we introduce X-Tokenizer, a lightweight encoder-Semantic Residual Quantization (SRQ)-decoder architecture that provides","authors_text":"Dongxiu Liu, Hang Su, Hao Wang, Jinliang Zheng, Lights Shi, Lucy Liang, Miracle Kang, Pushi Zhang, Roy Gan, Shawn Qin, Sylas Chen, Xianyuan Zhan, Yinan Zheng","cross_cats":["cs.AI","cs.LG","cs.RO"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T09:39:39Z","title":"X-Tokenizer: A Multimodal Action Tokenizer for Vision-Language-Action Pretraining"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.14752","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1432b9e5b00950ec71065e266ade849c10d179970b2fb3ed40697d64edeed70b","target":"record","created_at":"2026-06-30T01:17:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7a166cc9995932adf9760210f16526a946b4c72e538d6c48f29df2012a1c567d","cross_cats_sorted":["cs.AI","cs.LG","cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T09:39:39Z","title_canon_sha256":"0e32000ec000e5c94c481fe3a1d649a31851463f7c7e2f6c6a88c6179b90e329"},"schema_version":"1.0","source":{"id":"2606.14752","kind":"arxiv","version":2}},"canonical_sha256":"46082ffcd0d66390680ac594eb4d48185c9b454e768f1a097b45013f867e9b14","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"46082ffcd0d66390680ac594eb4d48185c9b454e768f1a097b45013f867e9b14","first_computed_at":"2026-06-30T01:17:41.672398Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-30T01:17:41.672398Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kyPdbg0HPUuJT1lbxe+XK1iLf+Bu2KqLl4db6ViLIrF2/0pcC6rU9d7QPtnFIMJuGfPExv0nI3gCKHikMfoDBw==","signature_status":"signed_v1","signed_at":"2026-06-30T01:17:41.672971Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.14752","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1432b9e5b00950ec71065e266ade849c10d179970b2fb3ed40697d64edeed70b","sha256:ed6bb0df0f3bb918161d5633e7b8a4583d7371e1ffa7bd32acb20af408098847"],"state_sha256":"073e4b97a235e48f25669228ac97dc60d51df94ffcfd8bce676ace30dcfd0931"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"toMTD33feEtVbQNcri9SaQ0Khj/nZkaSKzG2z5fhMkP9Dj9hKENb5PEpPc6Uu4Wv4mMKEoRYUWNF+tmuK2V7Dw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T15:02:56.873143Z","bundle_sha256":"07a990be41f0b9a9d0198d7104144019ee1609b71b7f5b9caf11cc9cd1b576e6"}}