{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:WLOGG3E5KB6WWDRP2DSJLWHNNC","short_pith_number":"pith:WLOGG3E5","canonical_record":{"source":{"id":"2605.24624","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-23T15:30:19Z","cross_cats_sorted":[],"title_canon_sha256":"00d1c997f472f437d5b775ec22f1aa21b1531d19db26fc5255be7b65deeac0ab","abstract_canon_sha256":"094c17c1e96c2339011e4d7ab6357e796bb6b48673a1470ec1ec04552cfcf6bb"},"schema_version":"1.0"},"canonical_sha256":"b2dc636c9d507d6b0e2fd0e495d8ed68b49c8cff25b96927a9260e3d7a1928bd","source":{"kind":"arxiv","id":"2605.24624","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.24624","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.24624v1","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24624","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"pith_short_12","alias_value":"WLOGG3E5KB6W","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"pith_short_16","alias_value":"WLOGG3E5KB6WWDRP","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"pith_short_8","alias_value":"WLOGG3E5","created_at":"2026-05-26T01:03:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:WLOGG3E5KB6WWDRP2DSJLWHNNC","target":"record","payload":{"canonical_record":{"source":{"id":"2605.24624","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-23T15:30:19Z","cross_cats_sorted":[],"title_canon_sha256":"00d1c997f472f437d5b775ec22f1aa21b1531d19db26fc5255be7b65deeac0ab","abstract_canon_sha256":"094c17c1e96c2339011e4d7ab6357e796bb6b48673a1470ec1ec04552cfcf6bb"},"schema_version":"1.0"},"canonical_sha256":"b2dc636c9d507d6b0e2fd0e495d8ed68b49c8cff25b96927a9260e3d7a1928bd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:49.517455Z","signature_b64":"bcmbGOA5SspMnyix4kpeMt6tKiD6JS5+dXARpKI2zzeAutb7pbjA00quzbMbEroSV7dX88XINfnYxg1nAH97AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b2dc636c9d507d6b0e2fd0e495d8ed68b49c8cff25b96927a9260e3d7a1928bd","last_reissued_at":"2026-05-26T01:03:49.516522Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:49.516522Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.24624","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ucGiCuOsHNjRIS6ms84ydf4JBUcpjof9iB44chNZ5/IussJYfmqhx1PvkhBz99pUmbKZ/XgOrRTQOdM25DgeBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T18:12:21.056680Z"},"content_sha256":"532e92fe7fe8552b7250b214dfcfa6f280543f22e3e5a6854623e7d5f4981b88","schema_version":"1.0","event_id":"sha256:532e92fe7fe8552b7250b214dfcfa6f280543f22e3e5a6854623e7d5f4981b88"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:WLOGG3E5KB6WWDRP2DSJLWHNNC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Vision-Language Binding in In-Context Image Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Antonio Torralba, Chris Ge, Rohit Gandikota, Tamar Rott Shaham","submitted_at":"2026-05-23T15:30:19Z","abstract_excerpt":"In-context image generation models such as FLUX.2 take a text prompt and an optional reference image as visual conditioning for the output. Internally, all three inputs -- text, reference image, and the noise tokens -- are concatenated and processed through a single attention stream, where all tokens can attend to one another. This leaves open how reference information flows through the model to produce the output image. We show that an implicit cross-modal binding emerges between the text tokens and the reference image: the text tokens absorb visual reference content during the forward pass, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24624","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.24624/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MvZ5t0OtD24ykheYEICLq5vR8IgC7kFKihQEQOfoaPtUy4DMPt8/TDyTE0lnEkfd3tMpR/wtk7odL+JPd6NGDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T18:12:21.057047Z"},"content_sha256":"f219e04072651d0443c86e06b88a75a83c2e29b5709f547c973a2af49cf3a47e","schema_version":"1.0","event_id":"sha256:f219e04072651d0443c86e06b88a75a83c2e29b5709f547c973a2af49cf3a47e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WLOGG3E5KB6WWDRP2DSJLWHNNC/bundle.json","state_url":"https://pith.science/pith/WLOGG3E5KB6WWDRP2DSJLWHNNC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WLOGG3E5KB6WWDRP2DSJLWHNNC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-03T18:12:21Z","links":{"resolver":"https://pith.science/pith/WLOGG3E5KB6WWDRP2DSJLWHNNC","bundle":"https://pith.science/pith/WLOGG3E5KB6WWDRP2DSJLWHNNC/bundle.json","state":"https://pith.science/pith/WLOGG3E5KB6WWDRP2DSJLWHNNC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WLOGG3E5KB6WWDRP2DSJLWHNNC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:WLOGG3E5KB6WWDRP2DSJLWHNNC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"094c17c1e96c2339011e4d7ab6357e796bb6b48673a1470ec1ec04552cfcf6bb","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-23T15:30:19Z","title_canon_sha256":"00d1c997f472f437d5b775ec22f1aa21b1531d19db26fc5255be7b65deeac0ab"},"schema_version":"1.0","source":{"id":"2605.24624","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.24624","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.24624v1","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24624","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"pith_short_12","alias_value":"WLOGG3E5KB6W","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"pith_short_16","alias_value":"WLOGG3E5KB6WWDRP","created_at":"2026-05-26T01:03:49Z"},{"alias_kind":"pith_short_8","alias_value":"WLOGG3E5","created_at":"2026-05-26T01:03:49Z"}],"graph_snapshots":[{"event_id":"sha256:f219e04072651d0443c86e06b88a75a83c2e29b5709f547c973a2af49cf3a47e","target":"graph","created_at":"2026-05-26T01:03:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.24624/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In-context image generation models such as FLUX.2 take a text prompt and an optional reference image as visual conditioning for the output. Internally, all three inputs -- text, reference image, and the noise tokens -- are concatenated and processed through a single attention stream, where all tokens can attend to one another. This leaves open how reference information flows through the model to produce the output image. We show that an implicit cross-modal binding emerges between the text tokens and the reference image: the text tokens absorb visual reference content during the forward pass, ","authors_text":"Antonio Torralba, Chris Ge, Rohit Gandikota, Tamar Rott Shaham","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-23T15:30:19Z","title":"Vision-Language Binding in In-Context Image Generation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24624","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:532e92fe7fe8552b7250b214dfcfa6f280543f22e3e5a6854623e7d5f4981b88","target":"record","created_at":"2026-05-26T01:03:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"094c17c1e96c2339011e4d7ab6357e796bb6b48673a1470ec1ec04552cfcf6bb","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-23T15:30:19Z","title_canon_sha256":"00d1c997f472f437d5b775ec22f1aa21b1531d19db26fc5255be7b65deeac0ab"},"schema_version":"1.0","source":{"id":"2605.24624","kind":"arxiv","version":1}},"canonical_sha256":"b2dc636c9d507d6b0e2fd0e495d8ed68b49c8cff25b96927a9260e3d7a1928bd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b2dc636c9d507d6b0e2fd0e495d8ed68b49c8cff25b96927a9260e3d7a1928bd","first_computed_at":"2026-05-26T01:03:49.516522Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:03:49.516522Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bcmbGOA5SspMnyix4kpeMt6tKiD6JS5+dXARpKI2zzeAutb7pbjA00quzbMbEroSV7dX88XINfnYxg1nAH97AQ==","signature_status":"signed_v1","signed_at":"2026-05-26T01:03:49.517455Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.24624","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:532e92fe7fe8552b7250b214dfcfa6f280543f22e3e5a6854623e7d5f4981b88","sha256:f219e04072651d0443c86e06b88a75a83c2e29b5709f547c973a2af49cf3a47e"],"state_sha256":"7bc70eef44a2af28e1af9bebdc8c76813bf09b73a290dc0c3e4838bfe857643e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7R2/Ss4Jc5dQSSew0bkk2MFV+BVauZGe8dCRzOLECMkaAb2DGtj/bDliUbFGr+5P8RtVe+e/NvxYcICrSHkOCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-03T18:12:21.059009Z","bundle_sha256":"c5ba1e2b19bbab52ca81f972ecb7d29e78c80483361444e5136fd73075d2d55e"}}