{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:YMW2MDHJMIZ3CUXESHWNWOJJ6K","short_pith_number":"pith:YMW2MDHJ","schema_version":"1.0","canonical_sha256":"c32da60ce96233b152e491ecdb3929f28ee7a399948604e7c30916173d95b6a2","source":{"kind":"arxiv","id":"2606.24883","version":1},"attestation_state":"computed","paper":{"title":"BenchX: Benchmarking AI Models for Cancer Detection and Localization with Demographic and Protocol Biases","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Akshay S. Chaudhari, Alan L. Yuille, Ashwin Kumar, Curtis Langlotz, Ibrahim Ethem Hamamci, Jakob Wasserthal, Kang Wang, Pedro R. A. S. Bassi, Qi Chen, Sezgin Er, Wenxuan Li, Xinze Zhou, Yang Yang, Yiwen Ye, Yuhan Wang, Yuyin Zhou, Zongwei Zhou","submitted_at":"2026-06-23T17:58:59Z","abstract_excerpt":"Artificial intelligence (AI) has achieved remarkable success in medical imaging, but it is widely recognized that these models often perform inconsistently across real-world clinical settings. Such inconsistencies occur when patient demographics and imaging protocols vary, for example, in detecting small tumors, analyzing scans from different contrast phases, or evaluating patients of different ages or sexes. To quantify these inconsistencies, we develop a large-scale, open benchmark of 85,355 CT scans that systematically evaluates 12 tumor-detection AI models across tumor size, location, pati"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.24883","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T17:58:59Z","cross_cats_sorted":[],"title_canon_sha256":"a390b3f915906a8a8053d2d73a3f2ce7d03e417db150c27a3135f112e840648e","abstract_canon_sha256":"56ac369bef05e0ab9c41c40428e6a945dcca5de89401436245b5d7a07f61af91"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:45.350301Z","signature_b64":"JL7apTrNrl52bGy45y/Eog0WhQ3wjMcKRKaVe+r1Sl3LCzVnoq8w9NhZH4UI1vZEusOC3vFqI7UY/HC2dy+NDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c32da60ce96233b152e491ecdb3929f28ee7a399948604e7c30916173d95b6a2","last_reissued_at":"2026-06-24T01:15:45.349926Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:45.349926Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"BenchX: Benchmarking AI Models for Cancer Detection and Localization with Demographic and Protocol Biases","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Akshay S. Chaudhari, Alan L. Yuille, Ashwin Kumar, Curtis Langlotz, Ibrahim Ethem Hamamci, Jakob Wasserthal, Kang Wang, Pedro R. A. S. Bassi, Qi Chen, Sezgin Er, Wenxuan Li, Xinze Zhou, Yang Yang, Yiwen Ye, Yuhan Wang, Yuyin Zhou, Zongwei Zhou","submitted_at":"2026-06-23T17:58:59Z","abstract_excerpt":"Artificial intelligence (AI) has achieved remarkable success in medical imaging, but it is widely recognized that these models often perform inconsistently across real-world clinical settings. Such inconsistencies occur when patient demographics and imaging protocols vary, for example, in detecting small tumors, analyzing scans from different contrast phases, or evaluating patients of different ages or sexes. To quantify these inconsistencies, we develop a large-scale, open benchmark of 85,355 CT scans that systematically evaluates 12 tumor-detection AI models across tumor size, location, pati"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24883","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24883/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.24883","created_at":"2026-06-24T01:15:45.349987+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.24883v1","created_at":"2026-06-24T01:15:45.349987+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24883","created_at":"2026-06-24T01:15:45.349987+00:00"},{"alias_kind":"pith_short_12","alias_value":"YMW2MDHJMIZ3","created_at":"2026-06-24T01:15:45.349987+00:00"},{"alias_kind":"pith_short_16","alias_value":"YMW2MDHJMIZ3CUXE","created_at":"2026-06-24T01:15:45.349987+00:00"},{"alias_kind":"pith_short_8","alias_value":"YMW2MDHJ","created_at":"2026-06-24T01:15:45.349987+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K","json":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K.json","graph_json":"https://pith.science/api/pith-number/YMW2MDHJMIZ3CUXESHWNWOJJ6K/graph.json","events_json":"https://pith.science/api/pith-number/YMW2MDHJMIZ3CUXESHWNWOJJ6K/events.json","paper":"https://pith.science/paper/YMW2MDHJ"},"agent_actions":{"view_html":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K","download_json":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K.json","view_paper":"https://pith.science/paper/YMW2MDHJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.24883&json=true","fetch_graph":"https://pith.science/api/pith-number/YMW2MDHJMIZ3CUXESHWNWOJJ6K/graph.json","fetch_events":"https://pith.science/api/pith-number/YMW2MDHJMIZ3CUXESHWNWOJJ6K/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K/action/storage_attestation","attest_author":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K/action/author_attestation","sign_citation":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K/action/citation_signature","submit_replication":"https://pith.science/pith/YMW2MDHJMIZ3CUXESHWNWOJJ6K/action/replication_record"}},"created_at":"2026-06-24T01:15:45.349987+00:00","updated_at":"2026-06-24T01:15:45.349987+00:00"}