{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:KDVPM7QGZOHTGBUGBRKZAYTXIQ","short_pith_number":"pith:KDVPM7QG","schema_version":"1.0","canonical_sha256":"50eaf67e06cb8f3306860c55906277442415e89466213450dad699ac6aab4a64","source":{"kind":"arxiv","id":"2606.11105","version":1},"attestation_state":"computed","paper":{"title":"PhantomBench: Benchmarking the Non-existential Threat of Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Haeji Jung, Hila Gonen","submitted_at":"2026-06-09T17:03:19Z","abstract_excerpt":"Hallucinations, where language models (LMs) generate factually ungrounded responses, pose serious risks, as users tend to blindly rely on them. This is particularly concerning in high-stakes domains, where consequences of such model behavior can lead to significant harms. Despite notable progress in understanding hallucinations, it remains unclear how reliably these models can recognize the limits of their knowledge. We introduce PhantomBench, the first large-scale benchmark of its kind, comprising more than 60K non-existent terms and entities derived from real concepts across diverse domains."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.11105","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-09T17:03:19Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"e72e9219bb0184c7ffb18555ac05535460595e043ddf672f93d99776d2abab3e","abstract_canon_sha256":"2871f39108e03749739f10776fba797ba0bab55ed70e055224b100e2b24ffe4e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:11:10.501228Z","signature_b64":"eOLH3eYYRThuMk9wKIws0zcvvxkbsp0Wahw53/9cUC9zbKi2avW8SS30CB+BLXPNLFn00weQQjeEiMIvypShDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"50eaf67e06cb8f3306860c55906277442415e89466213450dad699ac6aab4a64","last_reissued_at":"2026-06-10T01:11:10.500416Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:11:10.500416Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PhantomBench: Benchmarking the Non-existential Threat of Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Haeji Jung, Hila Gonen","submitted_at":"2026-06-09T17:03:19Z","abstract_excerpt":"Hallucinations, where language models (LMs) generate factually ungrounded responses, pose serious risks, as users tend to blindly rely on them. This is particularly concerning in high-stakes domains, where consequences of such model behavior can lead to significant harms. Despite notable progress in understanding hallucinations, it remains unclear how reliably these models can recognize the limits of their knowledge. We introduce PhantomBench, the first large-scale benchmark of its kind, comprising more than 60K non-existent terms and entities derived from real concepts across diverse domains."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11105","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.11105/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.11105","created_at":"2026-06-10T01:11:10.500544+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.11105v1","created_at":"2026-06-10T01:11:10.500544+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11105","created_at":"2026-06-10T01:11:10.500544+00:00"},{"alias_kind":"pith_short_12","alias_value":"KDVPM7QGZOHT","created_at":"2026-06-10T01:11:10.500544+00:00"},{"alias_kind":"pith_short_16","alias_value":"KDVPM7QGZOHTGBUG","created_at":"2026-06-10T01:11:10.500544+00:00"},{"alias_kind":"pith_short_8","alias_value":"KDVPM7QG","created_at":"2026-06-10T01:11:10.500544+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ","json":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ.json","graph_json":"https://pith.science/api/pith-number/KDVPM7QGZOHTGBUGBRKZAYTXIQ/graph.json","events_json":"https://pith.science/api/pith-number/KDVPM7QGZOHTGBUGBRKZAYTXIQ/events.json","paper":"https://pith.science/paper/KDVPM7QG"},"agent_actions":{"view_html":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ","download_json":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ.json","view_paper":"https://pith.science/paper/KDVPM7QG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.11105&json=true","fetch_graph":"https://pith.science/api/pith-number/KDVPM7QGZOHTGBUGBRKZAYTXIQ/graph.json","fetch_events":"https://pith.science/api/pith-number/KDVPM7QGZOHTGBUGBRKZAYTXIQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ/action/storage_attestation","attest_author":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ/action/author_attestation","sign_citation":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ/action/citation_signature","submit_replication":"https://pith.science/pith/KDVPM7QGZOHTGBUGBRKZAYTXIQ/action/replication_record"}},"created_at":"2026-06-10T01:11:10.500544+00:00","updated_at":"2026-06-10T01:11:10.500544+00:00"}