{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:OCQBNANCOTVTBGIXRKP2UKSARB","short_pith_number":"pith:OCQBNANC","canonical_record":{"source":{"id":"1704.04497","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-04-14T17:57:01Z","cross_cats_sorted":[],"title_canon_sha256":"71021d09c36663ded8343f8660d0111d6adc5f292900ba41b16dd4c76ba6d873","abstract_canon_sha256":"20989befbf620ae4bac7df212002d486a50d1e2ad3a20569522d9799b423ebc3"},"schema_version":"1.0"},"canonical_sha256":"70a01681a274eb3099178a9faa2a40885598105f64f4ffc8d37435fed875c608","source":{"kind":"arxiv","id":"1704.04497","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.04497","created_at":"2026-05-18T00:29:01Z"},{"alias_kind":"arxiv_version","alias_value":"1704.04497v3","created_at":"2026-05-18T00:29:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.04497","created_at":"2026-05-18T00:29:01Z"},{"alias_kind":"pith_short_12","alias_value":"OCQBNANCOTVT","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OCQBNANCOTVTBGIX","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OCQBNANC","created_at":"2026-05-18T12:31:34Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:OCQBNANCOTVTBGIXRKP2UKSARB","target":"record","payload":{"canonical_record":{"source":{"id":"1704.04497","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-04-14T17:57:01Z","cross_cats_sorted":[],"title_canon_sha256":"71021d09c36663ded8343f8660d0111d6adc5f292900ba41b16dd4c76ba6d873","abstract_canon_sha256":"20989befbf620ae4bac7df212002d486a50d1e2ad3a20569522d9799b423ebc3"},"schema_version":"1.0"},"canonical_sha256":"70a01681a274eb3099178a9faa2a40885598105f64f4ffc8d37435fed875c608","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:29:01.538440Z","signature_b64":"5l3lmNTqYpDwxLVJQjKdoPH62t0Q3TqqXGE6unEXgoP/zy5mUq2Rbc/ilXsr6+uposGXsNNs3NnnnSmk1PuQAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"70a01681a274eb3099178a9faa2a40885598105f64f4ffc8d37435fed875c608","last_reissued_at":"2026-05-18T00:29:01.538030Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:29:01.538030Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1704.04497","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:29:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4Z+btOQQ9IB6OWc78PZAX36GJsAplpGMMfIAxrbeix+at15TcuFPYiiJ2rDceUV0SgFaXz2IPk0d5lm27xmhCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-21T14:47:41.718872Z"},"content_sha256":"98746d7884e31298f0ad3ba0ed90aff5e4eca2bdbcd28b8b4bddf15446b01312","schema_version":"1.0","event_id":"sha256:98746d7884e31298f0ad3ba0ed90aff5e4eca2bdbcd28b8b4bddf15446b01312"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:OCQBNANCOTVTBGIXRKP2UKSARB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"TGIF-QA: Toward Spatio-Temporal Reasoning in Visual Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Gunhee Kim, Yale Song, Youngjae Yu, Youngjin Kim, Yunseok Jang","submitted_at":"2017-04-14T17:57:01Z","abstract_excerpt":"Vision and language understanding has emerged as a subject undergoing intense study in Artificial Intelligence. Among many tasks in this line of research, visual question answering (VQA) has been one of the most successful ones, where the goal is to learn a model that understands visual content at region-level details and finds their associations with pairs of questions and answers in the natural language form. Despite the rapid progress in the past few years, most existing work in VQA have focused primarily on images. In this paper, we focus on extending VQA to the video domain and contribute"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.04497","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:29:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LGoQSRI1IiHghr24J214qskLpkLKsGyAVqm1SuSVE26BtuHZmPUCPooOhTp+WIJ56Bh+l04vbNBMtiRrOeWYDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-21T14:47:41.719217Z"},"content_sha256":"dc21201df11c4774e980fb37878d497ffac94ccf67d82b9d65faa77bbc8e1bf1","schema_version":"1.0","event_id":"sha256:dc21201df11c4774e980fb37878d497ffac94ccf67d82b9d65faa77bbc8e1bf1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OCQBNANCOTVTBGIXRKP2UKSARB/bundle.json","state_url":"https://pith.science/pith/OCQBNANCOTVTBGIXRKP2UKSARB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OCQBNANCOTVTBGIXRKP2UKSARB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-21T14:47:41Z","links":{"resolver":"https://pith.science/pith/OCQBNANCOTVTBGIXRKP2UKSARB","bundle":"https://pith.science/pith/OCQBNANCOTVTBGIXRKP2UKSARB/bundle.json","state":"https://pith.science/pith/OCQBNANCOTVTBGIXRKP2UKSARB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OCQBNANCOTVTBGIXRKP2UKSARB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:OCQBNANCOTVTBGIXRKP2UKSARB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"20989befbf620ae4bac7df212002d486a50d1e2ad3a20569522d9799b423ebc3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-04-14T17:57:01Z","title_canon_sha256":"71021d09c36663ded8343f8660d0111d6adc5f292900ba41b16dd4c76ba6d873"},"schema_version":"1.0","source":{"id":"1704.04497","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.04497","created_at":"2026-05-18T00:29:01Z"},{"alias_kind":"arxiv_version","alias_value":"1704.04497v3","created_at":"2026-05-18T00:29:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.04497","created_at":"2026-05-18T00:29:01Z"},{"alias_kind":"pith_short_12","alias_value":"OCQBNANCOTVT","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OCQBNANCOTVTBGIX","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OCQBNANC","created_at":"2026-05-18T12:31:34Z"}],"graph_snapshots":[{"event_id":"sha256:dc21201df11c4774e980fb37878d497ffac94ccf67d82b9d65faa77bbc8e1bf1","target":"graph","created_at":"2026-05-18T00:29:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Vision and language understanding has emerged as a subject undergoing intense study in Artificial Intelligence. Among many tasks in this line of research, visual question answering (VQA) has been one of the most successful ones, where the goal is to learn a model that understands visual content at region-level details and finds their associations with pairs of questions and answers in the natural language form. Despite the rapid progress in the past few years, most existing work in VQA have focused primarily on images. In this paper, we focus on extending VQA to the video domain and contribute","authors_text":"Gunhee Kim, Yale Song, Youngjae Yu, Youngjin Kim, Yunseok Jang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-04-14T17:57:01Z","title":"TGIF-QA: Toward Spatio-Temporal Reasoning in Visual Question Answering"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.04497","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:98746d7884e31298f0ad3ba0ed90aff5e4eca2bdbcd28b8b4bddf15446b01312","target":"record","created_at":"2026-05-18T00:29:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"20989befbf620ae4bac7df212002d486a50d1e2ad3a20569522d9799b423ebc3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-04-14T17:57:01Z","title_canon_sha256":"71021d09c36663ded8343f8660d0111d6adc5f292900ba41b16dd4c76ba6d873"},"schema_version":"1.0","source":{"id":"1704.04497","kind":"arxiv","version":3}},"canonical_sha256":"70a01681a274eb3099178a9faa2a40885598105f64f4ffc8d37435fed875c608","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"70a01681a274eb3099178a9faa2a40885598105f64f4ffc8d37435fed875c608","first_computed_at":"2026-05-18T00:29:01.538030Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:29:01.538030Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5l3lmNTqYpDwxLVJQjKdoPH62t0Q3TqqXGE6unEXgoP/zy5mUq2Rbc/ilXsr6+uposGXsNNs3NnnnSmk1PuQAQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:29:01.538440Z","signed_message":"canonical_sha256_bytes"},"source_id":"1704.04497","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:98746d7884e31298f0ad3ba0ed90aff5e4eca2bdbcd28b8b4bddf15446b01312","sha256:dc21201df11c4774e980fb37878d497ffac94ccf67d82b9d65faa77bbc8e1bf1"],"state_sha256":"5488c3e5465bab0c124d0373faa78a2dc6d905700bc62122aeed4d192a50e17a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DQzAKVVK6Ct/Y0ZITQXWUiPA1TMvxIe6e9gvtUN0jjmtTi5DgFodVAm03bBUsEko/S1dSzNQVQ/ZwosUrx0kCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-21T14:47:41.721192Z","bundle_sha256":"09bb36340f8f7e7192fe18b7083ca0bf766403be517f0a495309381479060c9a"}}