{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:YGPZAUOIZLT6WLKKRTAEVAODJH","short_pith_number":"pith:YGPZAUOI","canonical_record":{"source":{"id":"2606.11702","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T06:26:52Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"ec3539a1e473764b6351ad653dfe1fa1fd48bc7f3d3d33cd8dc64b2e8bc2f627","abstract_canon_sha256":"10346df9c65b31ed5dabb60584e0838ba5a1ea91bf89ec844aa64597c925f670"},"schema_version":"1.0"},"canonical_sha256":"c19f9051c8cae7eb2d4a8cc04a81c349cbc5fb7c3d4a8acd4616560a5680f866","source":{"kind":"arxiv","id":"2606.11702","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11702","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11702v1","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11702","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"pith_short_12","alias_value":"YGPZAUOIZLT6","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"pith_short_16","alias_value":"YGPZAUOIZLT6WLKK","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"pith_short_8","alias_value":"YGPZAUOI","created_at":"2026-06-11T01:10:03Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:YGPZAUOIZLT6WLKKRTAEVAODJH","target":"record","payload":{"canonical_record":{"source":{"id":"2606.11702","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T06:26:52Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"ec3539a1e473764b6351ad653dfe1fa1fd48bc7f3d3d33cd8dc64b2e8bc2f627","abstract_canon_sha256":"10346df9c65b31ed5dabb60584e0838ba5a1ea91bf89ec844aa64597c925f670"},"schema_version":"1.0"},"canonical_sha256":"c19f9051c8cae7eb2d4a8cc04a81c349cbc5fb7c3d4a8acd4616560a5680f866","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:10:03.738013Z","signature_b64":"OeqxADNu65oU2SIcLA06gRNF8a+Q3gG8PxofkWQDsROD54c+3opd3fDyPCCYvCz9j9tNHQHulEizIjERK/hQCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c19f9051c8cae7eb2d4a8cc04a81c349cbc5fb7c3d4a8acd4616560a5680f866","last_reissued_at":"2026-06-11T01:10:03.737227Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:10:03.737227Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.11702","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:10:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DFGYDghNBdhEozlcySP8WouYMe6NSzPtmWdymUR+k6YNh4PIXg8gUAzo2i7OUhkOtggDPcwUegKRtiKnAu8ZBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T04:24:34.655579Z"},"content_sha256":"065ecbb791de0fa7d70735cc28836465f72a9208b386d95c52aa6b1a904a0046","schema_version":"1.0","event_id":"sha256:065ecbb791de0fa7d70735cc28836465f72a9208b386d95c52aa6b1a904a0046"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:YGPZAUOIZLT6WLKKRTAEVAODJH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MedCTA: A Benchmark for Clinical Tool Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Bernard Ghanem, Fida Mohammad Thoker, Hyewon Jeong, Tajamul Ashraf","submitted_at":"2026-06-10T06:26:52Z","abstract_excerpt":"To make clinically grounded decisions, medical AI agents are expected to go beyond simple recognition and be capable of tool retrieval, evidence acquisition, and integration. Existing benchmarks largely evaluate isolated perception or single-turn question answering, and therefore provide limited visibility into failures of planning, tool recruitment, and rollout reliability. We introduce MedCTA, a benchmark for evaluating medical tool agents on clinician-validated, step-implicit tasks grounded in realistic multimodal clinical inputs, including radiology images, pathology slides, and reports. M"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11702","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.11702/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:10:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"euyTBCSTps2L1ynRYkhcEuW08SkeKpZ0EsUiPBWaeHLK4h5lY0siXBxJnmNHY5fcIEaITODRi0udj2CFv0oDCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T04:24:34.655950Z"},"content_sha256":"4b1c9315fc891513c27c10121bb378723ef52bec4746abc943d92a0a325f4702","schema_version":"1.0","event_id":"sha256:4b1c9315fc891513c27c10121bb378723ef52bec4746abc943d92a0a325f4702"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YGPZAUOIZLT6WLKKRTAEVAODJH/bundle.json","state_url":"https://pith.science/pith/YGPZAUOIZLT6WLKKRTAEVAODJH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YGPZAUOIZLT6WLKKRTAEVAODJH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-28T04:24:34Z","links":{"resolver":"https://pith.science/pith/YGPZAUOIZLT6WLKKRTAEVAODJH","bundle":"https://pith.science/pith/YGPZAUOIZLT6WLKKRTAEVAODJH/bundle.json","state":"https://pith.science/pith/YGPZAUOIZLT6WLKKRTAEVAODJH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YGPZAUOIZLT6WLKKRTAEVAODJH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YGPZAUOIZLT6WLKKRTAEVAODJH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"10346df9c65b31ed5dabb60584e0838ba5a1ea91bf89ec844aa64597c925f670","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T06:26:52Z","title_canon_sha256":"ec3539a1e473764b6351ad653dfe1fa1fd48bc7f3d3d33cd8dc64b2e8bc2f627"},"schema_version":"1.0","source":{"id":"2606.11702","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11702","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11702v1","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11702","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"pith_short_12","alias_value":"YGPZAUOIZLT6","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"pith_short_16","alias_value":"YGPZAUOIZLT6WLKK","created_at":"2026-06-11T01:10:03Z"},{"alias_kind":"pith_short_8","alias_value":"YGPZAUOI","created_at":"2026-06-11T01:10:03Z"}],"graph_snapshots":[{"event_id":"sha256:4b1c9315fc891513c27c10121bb378723ef52bec4746abc943d92a0a325f4702","target":"graph","created_at":"2026-06-11T01:10:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.11702/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"To make clinically grounded decisions, medical AI agents are expected to go beyond simple recognition and be capable of tool retrieval, evidence acquisition, and integration. Existing benchmarks largely evaluate isolated perception or single-turn question answering, and therefore provide limited visibility into failures of planning, tool recruitment, and rollout reliability. We introduce MedCTA, a benchmark for evaluating medical tool agents on clinician-validated, step-implicit tasks grounded in realistic multimodal clinical inputs, including radiology images, pathology slides, and reports. M","authors_text":"Bernard Ghanem, Fida Mohammad Thoker, Hyewon Jeong, Tajamul Ashraf","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T06:26:52Z","title":"MedCTA: A Benchmark for Clinical Tool Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11702","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:065ecbb791de0fa7d70735cc28836465f72a9208b386d95c52aa6b1a904a0046","target":"record","created_at":"2026-06-11T01:10:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"10346df9c65b31ed5dabb60584e0838ba5a1ea91bf89ec844aa64597c925f670","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T06:26:52Z","title_canon_sha256":"ec3539a1e473764b6351ad653dfe1fa1fd48bc7f3d3d33cd8dc64b2e8bc2f627"},"schema_version":"1.0","source":{"id":"2606.11702","kind":"arxiv","version":1}},"canonical_sha256":"c19f9051c8cae7eb2d4a8cc04a81c349cbc5fb7c3d4a8acd4616560a5680f866","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c19f9051c8cae7eb2d4a8cc04a81c349cbc5fb7c3d4a8acd4616560a5680f866","first_computed_at":"2026-06-11T01:10:03.737227Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T01:10:03.737227Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OeqxADNu65oU2SIcLA06gRNF8a+Q3gG8PxofkWQDsROD54c+3opd3fDyPCCYvCz9j9tNHQHulEizIjERK/hQCw==","signature_status":"signed_v1","signed_at":"2026-06-11T01:10:03.738013Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.11702","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:065ecbb791de0fa7d70735cc28836465f72a9208b386d95c52aa6b1a904a0046","sha256:4b1c9315fc891513c27c10121bb378723ef52bec4746abc943d92a0a325f4702"],"state_sha256":"3263e76c1487ae2ef2148f41253ce84c206f907775653786774122573bcfb73f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rFNBRCPMl05V7TD6XsCx5DR6Q/brP4HF6hl4xbpCrRfA4UntPewAGwwzfDexxwHVLZd7Klb1/0saDv2gOGimAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-28T04:24:34.657982Z","bundle_sha256":"3d61b8b3fafd6378c89a22548f67743002869c82ece27dab5ba29e0c7997ad17"}}