{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:T2GB2AZRYNHCA4EWTC24WTMZC5","short_pith_number":"pith:T2GB2AZR","canonical_record":{"source":{"id":"2606.19684","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T01:19:32Z","cross_cats_sorted":[],"title_canon_sha256":"123928a085b5cddd086ec3a3eaa9fe11177759e0dfb4a489774cd9935b7d603b","abstract_canon_sha256":"0e9ff83ba45b44639477a9937a16e6849187fdd7cb9bf76388f2f4819a276ec9"},"schema_version":"1.0"},"canonical_sha256":"9e8c1d0331c34e20709698b5cb4d99174ae7c56e9394122fd9e84b981a03f1e9","source":{"kind":"arxiv","id":"2606.19684","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.19684","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"arxiv_version","alias_value":"2606.19684v1","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19684","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_12","alias_value":"T2GB2AZRYNHC","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_16","alias_value":"T2GB2AZRYNHCA4EW","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_8","alias_value":"T2GB2AZR","created_at":"2026-06-19T16:12:32Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:T2GB2AZRYNHCA4EWTC24WTMZC5","target":"record","payload":{"canonical_record":{"source":{"id":"2606.19684","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T01:19:32Z","cross_cats_sorted":[],"title_canon_sha256":"123928a085b5cddd086ec3a3eaa9fe11177759e0dfb4a489774cd9935b7d603b","abstract_canon_sha256":"0e9ff83ba45b44639477a9937a16e6849187fdd7cb9bf76388f2f4819a276ec9"},"schema_version":"1.0"},"canonical_sha256":"9e8c1d0331c34e20709698b5cb4d99174ae7c56e9394122fd9e84b981a03f1e9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:32.135048Z","signature_b64":"hgtfOPVNxoZaOPwL6oatyFjsDXHu6tTApK4CrAPmDT9XXBEfzkuzJLBQoqON+BtHGXK3m8VXW8N9q3yP244UCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9e8c1d0331c34e20709698b5cb4d99174ae7c56e9394122fd9e84b981a03f1e9","last_reissued_at":"2026-06-19T16:12:32.134689Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:32.134689Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.19684","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YRKauQzqwGv20Q+6On64oRGZkUNIcVuH3EXp3Y5VzzcrZRWhO9RmC47raDbfKjpB9QuvIh4Bpsdwv25rhrHGAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T21:01:13.688464Z"},"content_sha256":"dcd8f58de6721a4c559998899036a48c77280d9d1f3c0ec045b1097f4a73f459","schema_version":"1.0","event_id":"sha256:dcd8f58de6721a4c559998899036a48c77280d9d1f3c0ec045b1097f4a73f459"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:T2GB2AZRYNHCA4EWTC24WTMZC5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Exploring Multi-Modal Large Language Models and Two-Stage Fine-Tuning for Fashion Image Retrieval","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hoang Bui Le, Nam Vo Hoang, Nguyen Cao Hoang, Trung-Nghia Le","submitted_at":"2026-06-18T01:19:32Z","abstract_excerpt":"Composed image retrieval retrieves a target image using a composed query of a reference image and a modified text description. In the fashion domain, this task requires understanding subtle attribute variations such as color, pattern, and texture. However, existing approaches face limitations due to scarce annotated data and simplistic negative sampling. We propose a novel framework that integrates a multi-modal large language model (LLaVA) to generate attribute-aware triplets and introduces a two-stage fine-tuning strategy to enhance contrastive learning. We leverage pretrained vision-languag"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19684","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.19684/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xdvZse/YvMnIw5KyH0d1oquDu33J1HBWvMUFqtzyGbQs1wfzxVoAJwHIp7v1ifz6gOc0HSB4KUaD3g3PdWg8CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T21:01:13.688837Z"},"content_sha256":"8702c17ed5e26e742e449bcc749afabe475e1919002ec362de8cb6356cc46a94","schema_version":"1.0","event_id":"sha256:8702c17ed5e26e742e449bcc749afabe475e1919002ec362de8cb6356cc46a94"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/T2GB2AZRYNHCA4EWTC24WTMZC5/bundle.json","state_url":"https://pith.science/pith/T2GB2AZRYNHCA4EWTC24WTMZC5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/T2GB2AZRYNHCA4EWTC24WTMZC5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T21:01:13Z","links":{"resolver":"https://pith.science/pith/T2GB2AZRYNHCA4EWTC24WTMZC5","bundle":"https://pith.science/pith/T2GB2AZRYNHCA4EWTC24WTMZC5/bundle.json","state":"https://pith.science/pith/T2GB2AZRYNHCA4EWTC24WTMZC5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/T2GB2AZRYNHCA4EWTC24WTMZC5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:T2GB2AZRYNHCA4EWTC24WTMZC5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0e9ff83ba45b44639477a9937a16e6849187fdd7cb9bf76388f2f4819a276ec9","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T01:19:32Z","title_canon_sha256":"123928a085b5cddd086ec3a3eaa9fe11177759e0dfb4a489774cd9935b7d603b"},"schema_version":"1.0","source":{"id":"2606.19684","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.19684","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"arxiv_version","alias_value":"2606.19684v1","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19684","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_12","alias_value":"T2GB2AZRYNHC","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_16","alias_value":"T2GB2AZRYNHCA4EW","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_8","alias_value":"T2GB2AZR","created_at":"2026-06-19T16:12:32Z"}],"graph_snapshots":[{"event_id":"sha256:8702c17ed5e26e742e449bcc749afabe475e1919002ec362de8cb6356cc46a94","target":"graph","created_at":"2026-06-19T16:12:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.19684/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Composed image retrieval retrieves a target image using a composed query of a reference image and a modified text description. In the fashion domain, this task requires understanding subtle attribute variations such as color, pattern, and texture. However, existing approaches face limitations due to scarce annotated data and simplistic negative sampling. We propose a novel framework that integrates a multi-modal large language model (LLaVA) to generate attribute-aware triplets and introduces a two-stage fine-tuning strategy to enhance contrastive learning. We leverage pretrained vision-languag","authors_text":"Hoang Bui Le, Nam Vo Hoang, Nguyen Cao Hoang, Trung-Nghia Le","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T01:19:32Z","title":"Exploring Multi-Modal Large Language Models and Two-Stage Fine-Tuning for Fashion Image Retrieval"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19684","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dcd8f58de6721a4c559998899036a48c77280d9d1f3c0ec045b1097f4a73f459","target":"record","created_at":"2026-06-19T16:12:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0e9ff83ba45b44639477a9937a16e6849187fdd7cb9bf76388f2f4819a276ec9","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-18T01:19:32Z","title_canon_sha256":"123928a085b5cddd086ec3a3eaa9fe11177759e0dfb4a489774cd9935b7d603b"},"schema_version":"1.0","source":{"id":"2606.19684","kind":"arxiv","version":1}},"canonical_sha256":"9e8c1d0331c34e20709698b5cb4d99174ae7c56e9394122fd9e84b981a03f1e9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9e8c1d0331c34e20709698b5cb4d99174ae7c56e9394122fd9e84b981a03f1e9","first_computed_at":"2026-06-19T16:12:32.134689Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:12:32.134689Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hgtfOPVNxoZaOPwL6oatyFjsDXHu6tTApK4CrAPmDT9XXBEfzkuzJLBQoqON+BtHGXK3m8VXW8N9q3yP244UCA==","signature_status":"signed_v1","signed_at":"2026-06-19T16:12:32.135048Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.19684","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dcd8f58de6721a4c559998899036a48c77280d9d1f3c0ec045b1097f4a73f459","sha256:8702c17ed5e26e742e449bcc749afabe475e1919002ec362de8cb6356cc46a94"],"state_sha256":"57003d60765021f6e916314a43f4ac02b3a2c099bc3ec986259998559a55691d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nPCKg4JARtb12cGWSoDwSFUwuq9lnBN9z6i8J3hw8shxEpaVs62mW+iHG1kU47X/PQRcDODa0WncZPmILUkFDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T21:01:13.690851Z","bundle_sha256":"6cc78dc1d07c9d245eeb044c6a95ed8d7742f777b47f58fe9290869fbaa79469"}}