{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:AOFWR4MGVOQLMZWOFRPQOAYUC4","short_pith_number":"pith:AOFWR4MG","canonical_record":{"source":{"id":"2606.31407","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-30T09:35:20Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"e46d3855063ee725a235f52ffaff092a63443c5ab26131b53875d0a71e6b5aa7","abstract_canon_sha256":"5cffe26146f27f835c3877b076bd99593a5b133e12f76164820e73409753452c"},"schema_version":"1.0"},"canonical_sha256":"038b68f186aba0b666ce2c5f0703141711d1dc444a8354ffc0586b5692d02797","source":{"kind":"arxiv","id":"2606.31407","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.31407","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"arxiv_version","alias_value":"2606.31407v1","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.31407","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"pith_short_12","alias_value":"AOFWR4MGVOQL","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"pith_short_16","alias_value":"AOFWR4MGVOQLMZWO","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"pith_short_8","alias_value":"AOFWR4MG","created_at":"2026-07-01T01:18:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:AOFWR4MGVOQLMZWOFRPQOAYUC4","target":"record","payload":{"canonical_record":{"source":{"id":"2606.31407","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-30T09:35:20Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"e46d3855063ee725a235f52ffaff092a63443c5ab26131b53875d0a71e6b5aa7","abstract_canon_sha256":"5cffe26146f27f835c3877b076bd99593a5b133e12f76164820e73409753452c"},"schema_version":"1.0"},"canonical_sha256":"038b68f186aba0b666ce2c5f0703141711d1dc444a8354ffc0586b5692d02797","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T01:18:02.072719Z","signature_b64":"VrztcjmdOnfB8kAwImUnjsr5jJPbjzym4JKiOb8RYAFK1d8LAIdj5H/hfOcuJ6eYNDv4omrGnMQkD1fTHZsSBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"038b68f186aba0b666ce2c5f0703141711d1dc444a8354ffc0586b5692d02797","last_reissued_at":"2026-07-01T01:18:02.072279Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T01:18:02.072279Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.31407","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-01T01:18:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a2MdLpy8AcmpKZn3ix8XznOSGBHFpxheuytdNci+dxc+DA3ixl3UOqbzMMEXGtiwPjeSXHoJJNMY7p9Wt0CABQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T22:21:59.601846Z"},"content_sha256":"539c5ecda54889365cda846e7d9c3a6f7a0f5892846d0e2706f808ac3e66b5ff","schema_version":"1.0","event_id":"sha256:539c5ecda54889365cda846e7d9c3a6f7a0f5892846d0e2706f808ac3e66b5ff"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:AOFWR4MGVOQLMZWOFRPQOAYUC4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Visual Semantic Entropy: Do Vision Language Models Recognize Visual Ambiguity?","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Ankit Yadav, Johan W. Verjans, Minh-Son To, Ta Duc Huy, Townim Chowdhury, Trang Nguyen, Vu Minh Hieu Phan, Zhibin Liao","submitted_at":"2026-06-30T09:35:20Z","abstract_excerpt":"Vision-language models can produce confident answers on visually ambiguous inputs, resulting in biased predictions. Common entropy-based methods, such as Semantic Entropy (SE), rely on output diversity. Yet our analysis shows that overconfident visual embeddings suppress output diversity under stochastic decoding, causing SE to underestimate uncertainty in such cases. Recent methods instead probe output diversity through input perturbations, including textual paraphrasing or joint text-image perturbations, and show improved performance. We study these approaches and reveals that the resulting "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.31407","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.31407/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-01T01:18:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KdpiZhKsp+H9wvIF6DvNOW3eJ8VWszSg4EDrbwABrhO3RgBtk96lbtzBitbpDP+gS3O3oPEiPq9+81CdnH8ECw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T22:21:59.602220Z"},"content_sha256":"d04a166afe4d94d1cbf0256c8918b1588e46c2095d564c2820ba9963087270a1","schema_version":"1.0","event_id":"sha256:d04a166afe4d94d1cbf0256c8918b1588e46c2095d564c2820ba9963087270a1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AOFWR4MGVOQLMZWOFRPQOAYUC4/bundle.json","state_url":"https://pith.science/pith/AOFWR4MGVOQLMZWOFRPQOAYUC4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AOFWR4MGVOQLMZWOFRPQOAYUC4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-01T22:21:59Z","links":{"resolver":"https://pith.science/pith/AOFWR4MGVOQLMZWOFRPQOAYUC4","bundle":"https://pith.science/pith/AOFWR4MGVOQLMZWOFRPQOAYUC4/bundle.json","state":"https://pith.science/pith/AOFWR4MGVOQLMZWOFRPQOAYUC4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AOFWR4MGVOQLMZWOFRPQOAYUC4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AOFWR4MGVOQLMZWOFRPQOAYUC4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5cffe26146f27f835c3877b076bd99593a5b133e12f76164820e73409753452c","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-30T09:35:20Z","title_canon_sha256":"e46d3855063ee725a235f52ffaff092a63443c5ab26131b53875d0a71e6b5aa7"},"schema_version":"1.0","source":{"id":"2606.31407","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.31407","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"arxiv_version","alias_value":"2606.31407v1","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.31407","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"pith_short_12","alias_value":"AOFWR4MGVOQL","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"pith_short_16","alias_value":"AOFWR4MGVOQLMZWO","created_at":"2026-07-01T01:18:02Z"},{"alias_kind":"pith_short_8","alias_value":"AOFWR4MG","created_at":"2026-07-01T01:18:02Z"}],"graph_snapshots":[{"event_id":"sha256:d04a166afe4d94d1cbf0256c8918b1588e46c2095d564c2820ba9963087270a1","target":"graph","created_at":"2026-07-01T01:18:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.31407/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Vision-language models can produce confident answers on visually ambiguous inputs, resulting in biased predictions. Common entropy-based methods, such as Semantic Entropy (SE), rely on output diversity. Yet our analysis shows that overconfident visual embeddings suppress output diversity under stochastic decoding, causing SE to underestimate uncertainty in such cases. Recent methods instead probe output diversity through input perturbations, including textual paraphrasing or joint text-image perturbations, and show improved performance. We study these approaches and reveals that the resulting ","authors_text":"Ankit Yadav, Johan W. Verjans, Minh-Son To, Ta Duc Huy, Townim Chowdhury, Trang Nguyen, Vu Minh Hieu Phan, Zhibin Liao","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-30T09:35:20Z","title":"Visual Semantic Entropy: Do Vision Language Models Recognize Visual Ambiguity?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.31407","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:539c5ecda54889365cda846e7d9c3a6f7a0f5892846d0e2706f808ac3e66b5ff","target":"record","created_at":"2026-07-01T01:18:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5cffe26146f27f835c3877b076bd99593a5b133e12f76164820e73409753452c","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-30T09:35:20Z","title_canon_sha256":"e46d3855063ee725a235f52ffaff092a63443c5ab26131b53875d0a71e6b5aa7"},"schema_version":"1.0","source":{"id":"2606.31407","kind":"arxiv","version":1}},"canonical_sha256":"038b68f186aba0b666ce2c5f0703141711d1dc444a8354ffc0586b5692d02797","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"038b68f186aba0b666ce2c5f0703141711d1dc444a8354ffc0586b5692d02797","first_computed_at":"2026-07-01T01:18:02.072279Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-01T01:18:02.072279Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VrztcjmdOnfB8kAwImUnjsr5jJPbjzym4JKiOb8RYAFK1d8LAIdj5H/hfOcuJ6eYNDv4omrGnMQkD1fTHZsSBg==","signature_status":"signed_v1","signed_at":"2026-07-01T01:18:02.072719Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.31407","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:539c5ecda54889365cda846e7d9c3a6f7a0f5892846d0e2706f808ac3e66b5ff","sha256:d04a166afe4d94d1cbf0256c8918b1588e46c2095d564c2820ba9963087270a1"],"state_sha256":"190e3c8c43017652b80a59b4dc62b375ac516973b3e503084d184e9a5ed26a0f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sAhZ0UTH+9312YvKHm54C0ftDJa9rVRCWnvk78qCZenau81IhhVD7Cx8Q1z3LKJkpZKtCv3wTO8koutwJ2a3Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-01T22:21:59.604250Z","bundle_sha256":"a7d5743e9796d8b787e4bb8d21b5088cf9ac17fb3d6bc709a4f8c82c197431c2"}}