{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:RZHS77OVQHUROCXDUYTTPBFAPH","short_pith_number":"pith:RZHS77OV","schema_version":"1.0","canonical_sha256":"8e4f2ffdd581e9170ae3a6273784a079d13ff0f9d80c98a71f972f9f1300faa1","source":{"kind":"arxiv","id":"2509.02292","version":2},"attestation_state":"computed","paper":{"title":"LLMs and their Limited Theory of Mind: Evaluating Mental State Annotations in Situated Dialogue","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Katharine Kowalyshyn, Matthias Scheutz","submitted_at":"2025-09-02T13:11:24Z","abstract_excerpt":"What if large language models could not only infer human mindsets but also expose every blind spot in team dialogue such as discrepancies in the team members' joint understanding? We present a novel, two-step framework that leverages large language models (LLMs) both as human-style annotators of team dialogues to track the team's shared mental models (SMMs) and as automated discrepancy detectors among individuals' mental states. In the first step, an LLM generates annotations by identifying SMM elements within task-oriented dialogues from the Cooperative Remote Search Task (CReST) corpus. Then"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2509.02292","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-09-02T13:11:24Z","cross_cats_sorted":[],"title_canon_sha256":"3d6885f8e9dfe37f0d0854c5ed5976f31795bbde7e37bcfba068f81a79098621","abstract_canon_sha256":"1f6696a0265ed13b731af4f38737e9fc15a8152f197be10e7dc24ef3e1f8531b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T01:17:25.635568Z","signature_b64":"gjsAoYFDQIa0U81c8zozcju5/S7DnqFSDvP10VgWpSc5Fa2RQJ8iKfwtRoM8EImMi6CbXkokGVG7H2/2UMrsAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8e4f2ffdd581e9170ae3a6273784a079d13ff0f9d80c98a71f972f9f1300faa1","last_reissued_at":"2026-06-30T01:17:25.634928Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T01:17:25.634928Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"LLMs and their Limited Theory of Mind: Evaluating Mental State Annotations in Situated Dialogue","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Katharine Kowalyshyn, Matthias Scheutz","submitted_at":"2025-09-02T13:11:24Z","abstract_excerpt":"What if large language models could not only infer human mindsets but also expose every blind spot in team dialogue such as discrepancies in the team members' joint understanding? We present a novel, two-step framework that leverages large language models (LLMs) both as human-style annotators of team dialogues to track the team's shared mental models (SMMs) and as automated discrepancy detectors among individuals' mental states. In the first step, an LLM generates annotations by identifying SMM elements within task-oriented dialogues from the Cooperative Remote Search Task (CReST) corpus. Then"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.02292","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.02292/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2509.02292","created_at":"2026-06-30T01:17:25.635012+00:00"},{"alias_kind":"arxiv_version","alias_value":"2509.02292v2","created_at":"2026-06-30T01:17:25.635012+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.02292","created_at":"2026-06-30T01:17:25.635012+00:00"},{"alias_kind":"pith_short_12","alias_value":"RZHS77OVQHUR","created_at":"2026-06-30T01:17:25.635012+00:00"},{"alias_kind":"pith_short_16","alias_value":"RZHS77OVQHUROCXD","created_at":"2026-06-30T01:17:25.635012+00:00"},{"alias_kind":"pith_short_8","alias_value":"RZHS77OV","created_at":"2026-06-30T01:17:25.635012+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2604.21933","citing_title":"Not Another EHR: Reimagining Physician Information Needs with Generative AI Technology","ref_index":21,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH","json":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH.json","graph_json":"https://pith.science/api/pith-number/RZHS77OVQHUROCXDUYTTPBFAPH/graph.json","events_json":"https://pith.science/api/pith-number/RZHS77OVQHUROCXDUYTTPBFAPH/events.json","paper":"https://pith.science/paper/RZHS77OV"},"agent_actions":{"view_html":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH","download_json":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH.json","view_paper":"https://pith.science/paper/RZHS77OV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2509.02292&json=true","fetch_graph":"https://pith.science/api/pith-number/RZHS77OVQHUROCXDUYTTPBFAPH/graph.json","fetch_events":"https://pith.science/api/pith-number/RZHS77OVQHUROCXDUYTTPBFAPH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH/action/storage_attestation","attest_author":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH/action/author_attestation","sign_citation":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH/action/citation_signature","submit_replication":"https://pith.science/pith/RZHS77OVQHUROCXDUYTTPBFAPH/action/replication_record"}},"created_at":"2026-06-30T01:17:25.635012+00:00","updated_at":"2026-06-30T01:17:25.635012+00:00"}