{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:IYT27EWKNPMDLIXJZD4Z3KVH5Q","short_pith_number":"pith:IYT27EWK","canonical_record":{"source":{"id":"2605.14110","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T20:53:03Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"ec4506124253a3de3252722be03a18ffc73bbfe1d7525492bb96a4e8bd936298","abstract_canon_sha256":"c737199c220c3db37617317a9be54b30b0602e2b84612c2aba9b7309bec89bf6"},"schema_version":"1.0"},"canonical_sha256":"4627af92ca6bd835a2e9c8f99daaa7ec0b6ec5e0195d9c1215f197c35a9e6ff0","source":{"kind":"arxiv","id":"2605.14110","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14110","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14110v1","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14110","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"pith_short_12","alias_value":"IYT27EWKNPMD","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"IYT27EWKNPMDLIXJ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"IYT27EWK","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:IYT27EWKNPMDLIXJZD4Z3KVH5Q","target":"record","payload":{"canonical_record":{"source":{"id":"2605.14110","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T20:53:03Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"ec4506124253a3de3252722be03a18ffc73bbfe1d7525492bb96a4e8bd936298","abstract_canon_sha256":"c737199c220c3db37617317a9be54b30b0602e2b84612c2aba9b7309bec89bf6"},"schema_version":"1.0"},"canonical_sha256":"4627af92ca6bd835a2e9c8f99daaa7ec0b6ec5e0195d9c1215f197c35a9e6ff0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:12.011852Z","signature_b64":"efrH3niQK41CRWoMQjmJscGKDvvv5kILExQSpR2R/DMKbvXUkk8UQoqGj2pVOpr6ZhotiXx/KlXW1Z6+HznKAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4627af92ca6bd835a2e9c8f99daaa7ec0b6ec5e0195d9c1215f197c35a9e6ff0","last_reissued_at":"2026-05-17T23:39:12.011011Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:12.011011Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.14110","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mCtW5jYND0rjfoW2JzuYJY6m9As//Ynp9Ah2KlFYSUV9L3UBEPKzzHIQ16UVcXUcRA0Llkxhr+ho1QNQKzsQCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T17:53:30.085657Z"},"content_sha256":"b7a86a19f00ecdfedc82db83edffde44f60bbf408b6b332fab51136cbf1de6a8","schema_version":"1.0","event_id":"sha256:b7a86a19f00ecdfedc82db83edffde44f60bbf408b6b332fab51136cbf1de6a8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:IYT27EWKNPMDLIXJZD4Z3KVH5Q","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"SToRe3D prunes ViT tokens and 3D queries via mutual relevance heads to reach 3x faster multi-view detection with only marginal accuracy loss.","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Charles Cossette, Lezhou Feng, Lingting Ge, Sandro Papais","submitted_at":"2026-05-13T20:53:03Z","abstract_excerpt":"Vision Transformers (ViTs) enable strong multi-view 3D detection but are limited by high inference latency from dense token and query processing across multiple views and large 3D regions. Existing sparsity methods, designed mainly for 2D vision, prune or merge image tokens but do not extend to full-model sparsity or address 3D object queries. We introduce SToRe3D, a relevance-aligned sparsity framework that jointly selects 2D image tokens and 3D object queries while storing filtered features for reactivation. Mutual 2D-3D relevance heads allocate compute to driving-critical content and preser"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"SToRe3D achieves up to 3x faster inference with marginal accuracy loss, establishing real-time large-scale ViT-based 3D detection while maintaining accuracy on planning-critical agents.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the mutual 2D-3D relevance heads reliably identify driving-critical content and that storing filtered features does not introduce unacceptable reactivation overhead or accuracy degradation under varying scene conditions.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SToRe3D delivers up to 3x faster inference for multi-view 3D object detection in ViTs by selecting relevant 2D tokens and 3D queries via mutual relevance heads with only marginal accuracy loss.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"SToRe3D prunes ViT tokens and 3D queries via mutual relevance heads to reach 3x faster multi-view detection with only marginal accuracy loss.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"89575a1fc521333e420e975eae122a23134557770e3804e9b8ef5cbb79c23eef"},"source":{"id":"2605.14110","kind":"arxiv","version":1},"verdict":{"id":"a9a451d6-b6dc-4ae1-b351-44cceea859d8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T05:11:13.393094Z","strongest_claim":"SToRe3D achieves up to 3x faster inference with marginal accuracy loss, establishing real-time large-scale ViT-based 3D detection while maintaining accuracy on planning-critical agents.","one_line_summary":"SToRe3D delivers up to 3x faster inference for multi-view 3D object detection in ViTs by selecting relevant 2D tokens and 3D queries via mutual relevance heads with only marginal accuracy loss.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the mutual 2D-3D relevance heads reliably identify driving-critical content and that storing filtered features does not introduce unacceptable reactivation overhead or accuracy degradation under varying scene conditions.","pith_extraction_headline":"SToRe3D prunes ViT tokens and 3D queries via mutual relevance heads to reach 3x faster multi-view detection with only marginal accuracy loss."},"references":{"count":71,"sample":[{"doi":"","year":null,"title":"GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints","work_id":"b73ad5b2-e553-4c71-b0c9-67e67ba7b158","ref_index":1,"cited_arxiv_id":"2305.13245","is_internal_anchor":true},{"doi":"","year":2023,"title":"Token merging: Your vit but faster","work_id":"3a41079e-d522-4559-bbe1-f4f3c9708895","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"nuscenes: A multi- modal dataset for autonomous driving","work_id":"ce2f77fd-82cb-445f-8a60-028a56ad7c24","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"End- to-end object detection with transformers","work_id":"a8aa5f17-b506-45d3-9e8f-dcb5190a8ea8","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Pointbev: A sparse approach for bev predictions","work_id":"1e32ede3-ce91-499d-8f38-5885efa021b8","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":71,"snapshot_sha256":"9066b5a6c8688609fca94c3045102434600ff1deba7215c02a88274558f36dba","internal_anchors":9},"formal_canon":{"evidence_count":2,"snapshot_sha256":"dd94a082cc7c53bb9058eaf54d2189f8013d7c3ca1b7648e7c9d354c387ba733"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"a9a451d6-b6dc-4ae1-b351-44cceea859d8"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VRG9qNvVg7wSy5QrelxRzqtE/zNx5ybAzF6CNT4LXyncnoC6hsEsG+bsjLEXVQlyMbXz2UwUNCfqJypzW6nzCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T17:53:30.086194Z"},"content_sha256":"6cf98e063340b87d2d91a38cd00a8985b4ab88b010d9323237df0065fa167147","schema_version":"1.0","event_id":"sha256:6cf98e063340b87d2d91a38cd00a8985b4ab88b010d9323237df0065fa167147"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IYT27EWKNPMDLIXJZD4Z3KVH5Q/bundle.json","state_url":"https://pith.science/pith/IYT27EWKNPMDLIXJZD4Z3KVH5Q/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IYT27EWKNPMDLIXJZD4Z3KVH5Q/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T17:53:30Z","links":{"resolver":"https://pith.science/pith/IYT27EWKNPMDLIXJZD4Z3KVH5Q","bundle":"https://pith.science/pith/IYT27EWKNPMDLIXJZD4Z3KVH5Q/bundle.json","state":"https://pith.science/pith/IYT27EWKNPMDLIXJZD4Z3KVH5Q/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IYT27EWKNPMDLIXJZD4Z3KVH5Q/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:IYT27EWKNPMDLIXJZD4Z3KVH5Q","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c737199c220c3db37617317a9be54b30b0602e2b84612c2aba9b7309bec89bf6","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T20:53:03Z","title_canon_sha256":"ec4506124253a3de3252722be03a18ffc73bbfe1d7525492bb96a4e8bd936298"},"schema_version":"1.0","source":{"id":"2605.14110","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14110","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14110v1","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14110","created_at":"2026-05-17T23:39:12Z"},{"alias_kind":"pith_short_12","alias_value":"IYT27EWKNPMD","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"IYT27EWKNPMDLIXJ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"IYT27EWK","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:6cf98e063340b87d2d91a38cd00a8985b4ab88b010d9323237df0065fa167147","target":"graph","created_at":"2026-05-17T23:39:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"SToRe3D achieves up to 3x faster inference with marginal accuracy loss, establishing real-time large-scale ViT-based 3D detection while maintaining accuracy on planning-critical agents."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the mutual 2D-3D relevance heads reliably identify driving-critical content and that storing filtered features does not introduce unacceptable reactivation overhead or accuracy degradation under varying scene conditions."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"SToRe3D delivers up to 3x faster inference for multi-view 3D object detection in ViTs by selecting relevant 2D tokens and 3D queries via mutual relevance heads with only marginal accuracy loss."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"SToRe3D prunes ViT tokens and 3D queries via mutual relevance heads to reach 3x faster multi-view detection with only marginal accuracy loss."}],"snapshot_sha256":"89575a1fc521333e420e975eae122a23134557770e3804e9b8ef5cbb79c23eef"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"dd94a082cc7c53bb9058eaf54d2189f8013d7c3ca1b7648e7c9d354c387ba733"},"paper":{"abstract_excerpt":"Vision Transformers (ViTs) enable strong multi-view 3D detection but are limited by high inference latency from dense token and query processing across multiple views and large 3D regions. Existing sparsity methods, designed mainly for 2D vision, prune or merge image tokens but do not extend to full-model sparsity or address 3D object queries. We introduce SToRe3D, a relevance-aligned sparsity framework that jointly selects 2D image tokens and 3D object queries while storing filtered features for reactivation. Mutual 2D-3D relevance heads allocate compute to driving-critical content and preser","authors_text":"Charles Cossette, Lezhou Feng, Lingting Ge, Sandro Papais","cross_cats":["cs.RO"],"headline":"SToRe3D prunes ViT tokens and 3D queries via mutual relevance heads to reach 3x faster multi-view detection with only marginal accuracy loss.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T20:53:03Z","title":"SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection"},"references":{"count":71,"internal_anchors":9,"resolved_work":71,"sample":[{"cited_arxiv_id":"2305.13245","doi":"","is_internal_anchor":true,"ref_index":1,"title":"GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints","work_id":"b73ad5b2-e553-4c71-b0c9-67e67ba7b158","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Token merging: Your vit but faster","work_id":"3a41079e-d522-4559-bbe1-f4f3c9708895","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"nuscenes: A multi- modal dataset for autonomous driving","work_id":"ce2f77fd-82cb-445f-8a60-028a56ad7c24","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"End- to-end object detection with transformers","work_id":"a8aa5f17-b506-45d3-9e8f-dcb5190a8ea8","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Pointbev: A sparse approach for bev predictions","work_id":"1e32ede3-ce91-499d-8f38-5885efa021b8","year":2024}],"snapshot_sha256":"9066b5a6c8688609fca94c3045102434600ff1deba7215c02a88274558f36dba"},"source":{"id":"2605.14110","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T05:11:13.393094Z","id":"a9a451d6-b6dc-4ae1-b351-44cceea859d8","model_set":{"reader":"grok-4.3"},"one_line_summary":"SToRe3D delivers up to 3x faster inference for multi-view 3D object detection in ViTs by selecting relevant 2D tokens and 3D queries via mutual relevance heads with only marginal accuracy loss.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"SToRe3D prunes ViT tokens and 3D queries via mutual relevance heads to reach 3x faster multi-view detection with only marginal accuracy loss.","strongest_claim":"SToRe3D achieves up to 3x faster inference with marginal accuracy loss, establishing real-time large-scale ViT-based 3D detection while maintaining accuracy on planning-critical agents.","weakest_assumption":"That the mutual 2D-3D relevance heads reliably identify driving-critical content and that storing filtered features does not introduce unacceptable reactivation overhead or accuracy degradation under varying scene conditions."}},"verdict_id":"a9a451d6-b6dc-4ae1-b351-44cceea859d8"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b7a86a19f00ecdfedc82db83edffde44f60bbf408b6b332fab51136cbf1de6a8","target":"record","created_at":"2026-05-17T23:39:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c737199c220c3db37617317a9be54b30b0602e2b84612c2aba9b7309bec89bf6","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T20:53:03Z","title_canon_sha256":"ec4506124253a3de3252722be03a18ffc73bbfe1d7525492bb96a4e8bd936298"},"schema_version":"1.0","source":{"id":"2605.14110","kind":"arxiv","version":1}},"canonical_sha256":"4627af92ca6bd835a2e9c8f99daaa7ec0b6ec5e0195d9c1215f197c35a9e6ff0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4627af92ca6bd835a2e9c8f99daaa7ec0b6ec5e0195d9c1215f197c35a9e6ff0","first_computed_at":"2026-05-17T23:39:12.011011Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:12.011011Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"efrH3niQK41CRWoMQjmJscGKDvvv5kILExQSpR2R/DMKbvXUkk8UQoqGj2pVOpr6ZhotiXx/KlXW1Z6+HznKAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:12.011852Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14110","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b7a86a19f00ecdfedc82db83edffde44f60bbf408b6b332fab51136cbf1de6a8","sha256:6cf98e063340b87d2d91a38cd00a8985b4ab88b010d9323237df0065fa167147"],"state_sha256":"193fc64fcbb940fdff3618a91458588676da522fa2a9c9239de0dc356bed77d9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zrOo/c1cJQ3MJZkronrvoDLOH9po27D/os+lrrs5UkeGpcqHvunLGsFsCdx7C3++gOwjDBzAYzesq/i/aCuXBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T17:53:30.088905Z","bundle_sha256":"a5fd2bf24dfad6b8f7142a79e6b46158e410161e4f61cf3e4318335e95fc662f"}}