{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:N624Z4RVNIXXR5ALZTL4QDNWQJ","short_pith_number":"pith:N624Z4RV","schema_version":"1.0","canonical_sha256":"6fb5ccf2356a2f78f40bccd7c80db68265982ddc7986642a2bc5f5b21a30b04f","source":{"kind":"arxiv","id":"2606.17474","version":1},"attestation_state":"computed","paper":{"title":"AIPatient Arena: EHR-grounded evaluation of large language models in end-to-end clinical consultation workflows","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Bryan YP Yan, Guangxin Dai, Huizi Yu, Jiahui Niu, Jingxian He, Kent CY So, Lizhou Fan, Wenkong Wang, Xiang Li, Xin Ma, Xinxin Lin, Yanqiu Xing, Yun Kwok Wing, Zhiying Liang","submitted_at":"2026-06-16T03:35:17Z","abstract_excerpt":"Large language models (LLMs) are increasingly considered for use in clinical consultation tasks, yet most medical evaluations remain static, single-turn, or narrowly outcome-based, limiting their ability to reflect the sequential, uncertain, and interactive nature of real-world care. Here, we propose AIPatient Arena, an EHRs-grounded evaluation framework for assessing the clinical utility of LLMs across eight dimensions of clinical competence. The framework integrates EHR data into patient-specific knowledge graphs, enabling multi-turn physician-patient interactions. We applied AIPatient Arena"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.17474","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-16T03:35:17Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"97f9b09a7b9f3b4cefe6d452d1a3816f89f585c11e718a37c00ba221cd16f693","abstract_canon_sha256":"fd4f1c305a006cb793f86944be2427c96e0e25e327fc284458f477d9e81c7edf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:13.980148Z","signature_b64":"1jVOlVxPOLa7J/1nd2JBJlc424+gi/4NsLHiSA+YSZUIGAiEWrRIxbrGeyjhiNmuXmGjQgbac9VRRZV7GChuBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6fb5ccf2356a2f78f40bccd7c80db68265982ddc7986642a2bc5f5b21a30b04f","last_reissued_at":"2026-06-19T16:10:13.979777Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:13.979777Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"AIPatient Arena: EHR-grounded evaluation of large language models in end-to-end clinical consultation workflows","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Bryan YP Yan, Guangxin Dai, Huizi Yu, Jiahui Niu, Jingxian He, Kent CY So, Lizhou Fan, Wenkong Wang, Xiang Li, Xin Ma, Xinxin Lin, Yanqiu Xing, Yun Kwok Wing, Zhiying Liang","submitted_at":"2026-06-16T03:35:17Z","abstract_excerpt":"Large language models (LLMs) are increasingly considered for use in clinical consultation tasks, yet most medical evaluations remain static, single-turn, or narrowly outcome-based, limiting their ability to reflect the sequential, uncertain, and interactive nature of real-world care. Here, we propose AIPatient Arena, an EHRs-grounded evaluation framework for assessing the clinical utility of LLMs across eight dimensions of clinical competence. The framework integrates EHR data into patient-specific knowledge graphs, enabling multi-turn physician-patient interactions. We applied AIPatient Arena"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17474","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.17474/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.17474","created_at":"2026-06-19T16:10:13.979840+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.17474v1","created_at":"2026-06-19T16:10:13.979840+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17474","created_at":"2026-06-19T16:10:13.979840+00:00"},{"alias_kind":"pith_short_12","alias_value":"N624Z4RVNIXX","created_at":"2026-06-19T16:10:13.979840+00:00"},{"alias_kind":"pith_short_16","alias_value":"N624Z4RVNIXXR5AL","created_at":"2026-06-19T16:10:13.979840+00:00"},{"alias_kind":"pith_short_8","alias_value":"N624Z4RV","created_at":"2026-06-19T16:10:13.979840+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ","json":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ.json","graph_json":"https://pith.science/api/pith-number/N624Z4RVNIXXR5ALZTL4QDNWQJ/graph.json","events_json":"https://pith.science/api/pith-number/N624Z4RVNIXXR5ALZTL4QDNWQJ/events.json","paper":"https://pith.science/paper/N624Z4RV"},"agent_actions":{"view_html":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ","download_json":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ.json","view_paper":"https://pith.science/paper/N624Z4RV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.17474&json=true","fetch_graph":"https://pith.science/api/pith-number/N624Z4RVNIXXR5ALZTL4QDNWQJ/graph.json","fetch_events":"https://pith.science/api/pith-number/N624Z4RVNIXXR5ALZTL4QDNWQJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ/action/storage_attestation","attest_author":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ/action/author_attestation","sign_citation":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ/action/citation_signature","submit_replication":"https://pith.science/pith/N624Z4RVNIXXR5ALZTL4QDNWQJ/action/replication_record"}},"created_at":"2026-06-19T16:10:13.979840+00:00","updated_at":"2026-06-19T16:10:13.979840+00:00"}