{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:BAC4VYXIBH3JROW3RWU6I3Z2T3","short_pith_number":"pith:BAC4VYXI","canonical_record":{"source":{"id":"2606.24602","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T14:03:56Z","cross_cats_sorted":[],"title_canon_sha256":"6b4e3b9d070d0c720002decffbd71d7cca355b80a2569cc805d493654b86281b","abstract_canon_sha256":"0098f706eff013095d929c5e9bc75152c1ac10e32d221419df2debdd67d934dc"},"schema_version":"1.0"},"canonical_sha256":"0805cae2e809f698badb8da9e46f3a9ee6d9a05c00151baf5713898059749052","source":{"kind":"arxiv","id":"2606.24602","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24602","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24602v1","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24602","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_12","alias_value":"BAC4VYXIBH3J","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_16","alias_value":"BAC4VYXIBH3JROW3","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_8","alias_value":"BAC4VYXI","created_at":"2026-06-24T01:15:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:BAC4VYXIBH3JROW3RWU6I3Z2T3","target":"record","payload":{"canonical_record":{"source":{"id":"2606.24602","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T14:03:56Z","cross_cats_sorted":[],"title_canon_sha256":"6b4e3b9d070d0c720002decffbd71d7cca355b80a2569cc805d493654b86281b","abstract_canon_sha256":"0098f706eff013095d929c5e9bc75152c1ac10e32d221419df2debdd67d934dc"},"schema_version":"1.0"},"canonical_sha256":"0805cae2e809f698badb8da9e46f3a9ee6d9a05c00151baf5713898059749052","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:36.736024Z","signature_b64":"pq/iIvyTrlu+czwMcIg+y1TZFc0PbNeuGzji8jYo6+3SO8c/6Kk3F7enUU3kRH8MlxjNTLjWuYeHOzoITOL3Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0805cae2e809f698badb8da9e46f3a9ee6d9a05c00151baf5713898059749052","last_reissued_at":"2026-06-24T01:15:36.735646Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:36.735646Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.24602","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pzYNgY/KDyNUr1uOdrkjnxjngAyJhKE40oBSNd+btP3ah2GfnOLLG6aSBO6l5q+oZFdgPdwgyyUIq0py4SfKDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T14:37:26.628953Z"},"content_sha256":"88b54aba395e88a45f5a40f3c9fcff6f1ee0fe5de8d7a35a7c20c6a771a8e585","schema_version":"1.0","event_id":"sha256:88b54aba395e88a45f5a40f3c9fcff6f1ee0fe5de8d7a35a7c20c6a771a8e585"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:BAC4VYXIBH3JROW3RWU6I3Z2T3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ViTexQA: A Multi-Frame Temporal Perception Dataset for Video Text Question Answering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chen Duan, Kai Zhou, Pengfei Yan, Tongkun Guan, Zhentao Guo, Zining Wang","submitted_at":"2026-06-23T14:03:56Z","abstract_excerpt":"Despite remarkable progress in multimodal understanding, current MLLMs still exhibit limitations in video text understanding, particularly when semantics emerge through the integration of temporally distributed textual cues across multiple frames. This perception challenge fundamentally differs from static image text understanding, yet existing datasets fail to capture: the vast majority of questions remain answerable from single frames, inadequately reflecting real-world video text comprehension demands. To address this, we present ViTexQA, a large-scale video-text QA dataset, and FrameThinke"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24602","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24602/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6XsAB/sEpyeqc4zVQ7tmGNRoZysWG5j1kEn8vb39A1vWOxWp42/P143MPpsJXsR4BeTksWf4eiknMSafNcj+Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T14:37:26.629339Z"},"content_sha256":"a50134b9520480890e1e286717862952229ce65d3d1f6f99abf49f9587d8e582","schema_version":"1.0","event_id":"sha256:a50134b9520480890e1e286717862952229ce65d3d1f6f99abf49f9587d8e582"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BAC4VYXIBH3JROW3RWU6I3Z2T3/bundle.json","state_url":"https://pith.science/pith/BAC4VYXIBH3JROW3RWU6I3Z2T3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BAC4VYXIBH3JROW3RWU6I3Z2T3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T14:37:26Z","links":{"resolver":"https://pith.science/pith/BAC4VYXIBH3JROW3RWU6I3Z2T3","bundle":"https://pith.science/pith/BAC4VYXIBH3JROW3RWU6I3Z2T3/bundle.json","state":"https://pith.science/pith/BAC4VYXIBH3JROW3RWU6I3Z2T3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BAC4VYXIBH3JROW3RWU6I3Z2T3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:BAC4VYXIBH3JROW3RWU6I3Z2T3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0098f706eff013095d929c5e9bc75152c1ac10e32d221419df2debdd67d934dc","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T14:03:56Z","title_canon_sha256":"6b4e3b9d070d0c720002decffbd71d7cca355b80a2569cc805d493654b86281b"},"schema_version":"1.0","source":{"id":"2606.24602","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24602","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24602v1","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24602","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_12","alias_value":"BAC4VYXIBH3J","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_16","alias_value":"BAC4VYXIBH3JROW3","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_8","alias_value":"BAC4VYXI","created_at":"2026-06-24T01:15:36Z"}],"graph_snapshots":[{"event_id":"sha256:a50134b9520480890e1e286717862952229ce65d3d1f6f99abf49f9587d8e582","target":"graph","created_at":"2026-06-24T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.24602/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Despite remarkable progress in multimodal understanding, current MLLMs still exhibit limitations in video text understanding, particularly when semantics emerge through the integration of temporally distributed textual cues across multiple frames. This perception challenge fundamentally differs from static image text understanding, yet existing datasets fail to capture: the vast majority of questions remain answerable from single frames, inadequately reflecting real-world video text comprehension demands. To address this, we present ViTexQA, a large-scale video-text QA dataset, and FrameThinke","authors_text":"Chen Duan, Kai Zhou, Pengfei Yan, Tongkun Guan, Zhentao Guo, Zining Wang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T14:03:56Z","title":"ViTexQA: A Multi-Frame Temporal Perception Dataset for Video Text Question Answering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24602","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:88b54aba395e88a45f5a40f3c9fcff6f1ee0fe5de8d7a35a7c20c6a771a8e585","target":"record","created_at":"2026-06-24T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0098f706eff013095d929c5e9bc75152c1ac10e32d221419df2debdd67d934dc","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T14:03:56Z","title_canon_sha256":"6b4e3b9d070d0c720002decffbd71d7cca355b80a2569cc805d493654b86281b"},"schema_version":"1.0","source":{"id":"2606.24602","kind":"arxiv","version":1}},"canonical_sha256":"0805cae2e809f698badb8da9e46f3a9ee6d9a05c00151baf5713898059749052","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0805cae2e809f698badb8da9e46f3a9ee6d9a05c00151baf5713898059749052","first_computed_at":"2026-06-24T01:15:36.735646Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T01:15:36.735646Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pq/iIvyTrlu+czwMcIg+y1TZFc0PbNeuGzji8jYo6+3SO8c/6Kk3F7enUU3kRH8MlxjNTLjWuYeHOzoITOL3Dw==","signature_status":"signed_v1","signed_at":"2026-06-24T01:15:36.736024Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.24602","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:88b54aba395e88a45f5a40f3c9fcff6f1ee0fe5de8d7a35a7c20c6a771a8e585","sha256:a50134b9520480890e1e286717862952229ce65d3d1f6f99abf49f9587d8e582"],"state_sha256":"ea28e06c54ce34aafea67a842077b804cc967508c7ee8d0cfbf06f332831b442"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"C5Kt04WhhqBpI3nB3kDG4ulMGf/sVusH+ZXdu8UeahD18ATqMuSoHlq3KEerTS8657Oy0RGI+6IJSpsm7sSsDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T14:37:26.631306Z","bundle_sha256":"1cc9fd8bc954397d4faf73cbd0e27f26e949f28cd15f6737f1e16c08e9dc3355"}}