{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:VKKO2XHTTSLSVJRQN4XS64IT5R","short_pith_number":"pith:VKKO2XHT","schema_version":"1.0","canonical_sha256":"aa94ed5cf39c972aa6306f2f2f7113ec597d69332de7e572d5ed7979c2cf5695","source":{"kind":"arxiv","id":"2605.15589","version":1},"attestation_state":"computed","paper":{"title":"MHGraphBench: Knowledge Graph-Grounded Benchmarking of Mental Health Knowledge in Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bradley A. Malin, Congning Ni, Murat Kantarcioglu, Shelagh A. Mulvaney, Susannah L. Rose, Weixin Liu, Zhijun Yin","submitted_at":"2026-05-15T03:55:27Z","abstract_excerpt":"Large language models (LLMs) are increasingly used in the mental health domain, yet it remains unclear how well they capture related biomedical knowledge and how reliably they apply it to clinically salient structured judgments. Here, we present a knowledge-graph (KG)-grounded benchmark for assessing LLMs on mental-health entity recognition, relation judgment, and two-hop reasoning. The benchmark is derived from PrimeKG and comprises nine task families with KG-supported answers and controlled negative options. Experiments across 15 closed- and open-source LLMs reveal a persistent recognition-t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.15589","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-15T03:55:27Z","cross_cats_sorted":[],"title_canon_sha256":"92ecbd32fd359f27937df82b298067395ce44a0786ea395d1294281152d7ba88","abstract_canon_sha256":"b49a98370fde90a1c7cda64e9ce9d2cb14cea34e473e4c0c0ec1d92f4675175d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:06.930488Z","signature_b64":"+DgjrkCPxQPy3N62LFp8mEpT5FJJDI+imKxG6cx1IR/tq6g0Bh5jKE4K+48f6ez0Chrn5cmtnP05H7WtNJU4BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aa94ed5cf39c972aa6306f2f2f7113ec597d69332de7e572d5ed7979c2cf5695","last_reissued_at":"2026-05-20T00:01:06.929759Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:06.929759Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MHGraphBench: Knowledge Graph-Grounded Benchmarking of Mental Health Knowledge in Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bradley A. Malin, Congning Ni, Murat Kantarcioglu, Shelagh A. Mulvaney, Susannah L. Rose, Weixin Liu, Zhijun Yin","submitted_at":"2026-05-15T03:55:27Z","abstract_excerpt":"Large language models (LLMs) are increasingly used in the mental health domain, yet it remains unclear how well they capture related biomedical knowledge and how reliably they apply it to clinically salient structured judgments. Here, we present a knowledge-graph (KG)-grounded benchmark for assessing LLMs on mental-health entity recognition, relation judgment, and two-hop reasoning. The benchmark is derived from PrimeKG and comprises nine task families with KG-supported answers and controlled negative options. Experiments across 15 closed- and open-source LLMs reveal a persistent recognition-t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.15589","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15589/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T19:34:35.242087Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T17:41:56.063980Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"f3268af58e4c4a3fcdab18a3d6ed1f73b057c9fbff8cef82b7c7bff7bbdc7fd5"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15589","created_at":"2026-05-20T00:01:06.929882+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15589v1","created_at":"2026-05-20T00:01:06.929882+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15589","created_at":"2026-05-20T00:01:06.929882+00:00"},{"alias_kind":"pith_short_12","alias_value":"VKKO2XHTTSLS","created_at":"2026-05-20T00:01:06.929882+00:00"},{"alias_kind":"pith_short_16","alias_value":"VKKO2XHTTSLSVJRQ","created_at":"2026-05-20T00:01:06.929882+00:00"},{"alias_kind":"pith_short_8","alias_value":"VKKO2XHT","created_at":"2026-05-20T00:01:06.929882+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R","json":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R.json","graph_json":"https://pith.science/api/pith-number/VKKO2XHTTSLSVJRQN4XS64IT5R/graph.json","events_json":"https://pith.science/api/pith-number/VKKO2XHTTSLSVJRQN4XS64IT5R/events.json","paper":"https://pith.science/paper/VKKO2XHT"},"agent_actions":{"view_html":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R","download_json":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R.json","view_paper":"https://pith.science/paper/VKKO2XHT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15589&json=true","fetch_graph":"https://pith.science/api/pith-number/VKKO2XHTTSLSVJRQN4XS64IT5R/graph.json","fetch_events":"https://pith.science/api/pith-number/VKKO2XHTTSLSVJRQN4XS64IT5R/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R/action/storage_attestation","attest_author":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R/action/author_attestation","sign_citation":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R/action/citation_signature","submit_replication":"https://pith.science/pith/VKKO2XHTTSLSVJRQN4XS64IT5R/action/replication_record"}},"created_at":"2026-05-20T00:01:06.929882+00:00","updated_at":"2026-05-20T00:01:06.929882+00:00"}