{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:IJQWRPLDEJHC55NSYIVAN4YJEJ","short_pith_number":"pith:IJQWRPLD","canonical_record":{"source":{"id":"1812.00303","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-12-02T02:08:02Z","cross_cats_sorted":[],"title_canon_sha256":"b2b9d8ce268f1a67aaeac60e50479a80a3fbeec64357850470ce820b2c3e4623","abstract_canon_sha256":"3256a3e14f266831be12b629fd52b1a95076e1d7a5560270d078e990c98a4a60"},"schema_version":"1.0"},"canonical_sha256":"426168bd63224e2ef5b2c22a06f309225d37842367f091caeb5b564ace39f302","source":{"kind":"arxiv","id":"1812.00303","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.00303","created_at":"2026-05-17T23:59:21Z"},{"alias_kind":"arxiv_version","alias_value":"1812.00303v1","created_at":"2026-05-17T23:59:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.00303","created_at":"2026-05-17T23:59:21Z"},{"alias_kind":"pith_short_12","alias_value":"IJQWRPLDEJHC","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"IJQWRPLDEJHC55NS","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"IJQWRPLD","created_at":"2026-05-18T12:32:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:IJQWRPLDEJHC55NSYIVAN4YJEJ","target":"record","payload":{"canonical_record":{"source":{"id":"1812.00303","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-12-02T02:08:02Z","cross_cats_sorted":[],"title_canon_sha256":"b2b9d8ce268f1a67aaeac60e50479a80a3fbeec64357850470ce820b2c3e4623","abstract_canon_sha256":"3256a3e14f266831be12b629fd52b1a95076e1d7a5560270d078e990c98a4a60"},"schema_version":"1.0"},"canonical_sha256":"426168bd63224e2ef5b2c22a06f309225d37842367f091caeb5b564ace39f302","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:21.651949Z","signature_b64":"/rr/l7zjU1ToCUCuy+E7VYJmwtM8xsDwq5KNoieS8I05XJOD6cnek655Cr3nMWeNuICHal4HzlVG/iEVOr/LBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"426168bd63224e2ef5b2c22a06f309225d37842367f091caeb5b564ace39f302","last_reissued_at":"2026-05-17T23:59:21.651444Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:21.651444Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1812.00303","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"k4TcYum6hVKnOtXLMarzwq+ERx3R7g+7cbOfLX9xX6BLM8GyQTJ+mhmB1zBP9Zvu+PBnueMoEH0UfV17JlSVBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T14:26:31.752324Z"},"content_sha256":"0fc6a85c812a8b383f6fb0cda585d9e7abf53553373f4b480217ff93822c53a3","schema_version":"1.0","event_id":"sha256:0fc6a85c812a8b383f6fb0cda585d9e7abf53553373f4b480217ff93822c53a3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:IJQWRPLDEJHC55NSYIVAN4YJEJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Multi-modal Capsule Routing for Actor and Action Video Segmentation Conditioned on Natural Language Queries","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Bruce McIntosh, Kevin Duarte, Mubarak Shah, Yogesh S Rawat","submitted_at":"2018-12-02T02:08:02Z","abstract_excerpt":"In this paper, we propose an end-to-end capsule network for pixel level localization of actors and actions present in a video. The localization is performed based on a natural language query through which an actor and action are specified. We propose to encode both the video as well as textual input in the form of capsules, which provide more effective representation in comparison with standard convolution based features. We introduce a novel capsule based attention mechanism for fusion of video and text capsules for text selected video segmentation. The attention mechanism is performed via jo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.00303","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:59:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OFvnRtHX0/ZyzVACLqxoJdifOUoruS9n3j22ynL+GKAKKCXbLbQxqGu3AEbnMOVx8GEPCM3NK7azDxk29laZAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T14:26:31.752680Z"},"content_sha256":"379ffdd939fd7b866ddddc6bd0515d3b6465bf8514abec3d66d202a4887e7a24","schema_version":"1.0","event_id":"sha256:379ffdd939fd7b866ddddc6bd0515d3b6465bf8514abec3d66d202a4887e7a24"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IJQWRPLDEJHC55NSYIVAN4YJEJ/bundle.json","state_url":"https://pith.science/pith/IJQWRPLDEJHC55NSYIVAN4YJEJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IJQWRPLDEJHC55NSYIVAN4YJEJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T14:26:31Z","links":{"resolver":"https://pith.science/pith/IJQWRPLDEJHC55NSYIVAN4YJEJ","bundle":"https://pith.science/pith/IJQWRPLDEJHC55NSYIVAN4YJEJ/bundle.json","state":"https://pith.science/pith/IJQWRPLDEJHC55NSYIVAN4YJEJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IJQWRPLDEJHC55NSYIVAN4YJEJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:IJQWRPLDEJHC55NSYIVAN4YJEJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3256a3e14f266831be12b629fd52b1a95076e1d7a5560270d078e990c98a4a60","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-12-02T02:08:02Z","title_canon_sha256":"b2b9d8ce268f1a67aaeac60e50479a80a3fbeec64357850470ce820b2c3e4623"},"schema_version":"1.0","source":{"id":"1812.00303","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1812.00303","created_at":"2026-05-17T23:59:21Z"},{"alias_kind":"arxiv_version","alias_value":"1812.00303v1","created_at":"2026-05-17T23:59:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.00303","created_at":"2026-05-17T23:59:21Z"},{"alias_kind":"pith_short_12","alias_value":"IJQWRPLDEJHC","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_16","alias_value":"IJQWRPLDEJHC55NS","created_at":"2026-05-18T12:32:31Z"},{"alias_kind":"pith_short_8","alias_value":"IJQWRPLD","created_at":"2026-05-18T12:32:31Z"}],"graph_snapshots":[{"event_id":"sha256:379ffdd939fd7b866ddddc6bd0515d3b6465bf8514abec3d66d202a4887e7a24","target":"graph","created_at":"2026-05-17T23:59:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this paper, we propose an end-to-end capsule network for pixel level localization of actors and actions present in a video. The localization is performed based on a natural language query through which an actor and action are specified. We propose to encode both the video as well as textual input in the form of capsules, which provide more effective representation in comparison with standard convolution based features. We introduce a novel capsule based attention mechanism for fusion of video and text capsules for text selected video segmentation. The attention mechanism is performed via jo","authors_text":"Bruce McIntosh, Kevin Duarte, Mubarak Shah, Yogesh S Rawat","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-12-02T02:08:02Z","title":"Multi-modal Capsule Routing for Actor and Action Video Segmentation Conditioned on Natural Language Queries"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.00303","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0fc6a85c812a8b383f6fb0cda585d9e7abf53553373f4b480217ff93822c53a3","target":"record","created_at":"2026-05-17T23:59:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3256a3e14f266831be12b629fd52b1a95076e1d7a5560270d078e990c98a4a60","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-12-02T02:08:02Z","title_canon_sha256":"b2b9d8ce268f1a67aaeac60e50479a80a3fbeec64357850470ce820b2c3e4623"},"schema_version":"1.0","source":{"id":"1812.00303","kind":"arxiv","version":1}},"canonical_sha256":"426168bd63224e2ef5b2c22a06f309225d37842367f091caeb5b564ace39f302","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"426168bd63224e2ef5b2c22a06f309225d37842367f091caeb5b564ace39f302","first_computed_at":"2026-05-17T23:59:21.651444Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:59:21.651444Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/rr/l7zjU1ToCUCuy+E7VYJmwtM8xsDwq5KNoieS8I05XJOD6cnek655Cr3nMWeNuICHal4HzlVG/iEVOr/LBQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:59:21.651949Z","signed_message":"canonical_sha256_bytes"},"source_id":"1812.00303","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0fc6a85c812a8b383f6fb0cda585d9e7abf53553373f4b480217ff93822c53a3","sha256:379ffdd939fd7b866ddddc6bd0515d3b6465bf8514abec3d66d202a4887e7a24"],"state_sha256":"f251fcd140121444b77d5c6d61b229160bf8dcdac7b9ca99d7c3a99c9799ffc5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"itJ8kzzzFju6oiqtvqs5PxIqyLGunfTuX29VDJgWw0NACC53xeUXloZTNH1OeUkothaVncO5XQ63gSko687hAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T14:26:31.754591Z","bundle_sha256":"521b96fe4e0ce6b9d7a27d19c978bcefb0ab486a2e33320daf0b3ca82b76f473"}}