{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:NELB2PSCO25RP37K6XFVOQGXES","short_pith_number":"pith:NELB2PSC","schema_version":"1.0","canonical_sha256":"69161d3e4276bb17efeaf5cb5740d724a6f4514c0ca31602c954c35c9b5b2043","source":{"kind":"arxiv","id":"2606.12730","version":1},"attestation_state":"computed","paper":{"title":"Rethinking Psychometric Evaluation of LLMs: When and Why Self-Reports Predict Behavior","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.CY","cs.LG"],"primary_cat":"cs.AI","authors_text":"Anima Anandkumar, Dean Mobbs, Myrl G. Marmarelis, Peiyang Song, Pengrui Han, Rafal Kocielnik, Ramit Debnath, R. Michael Alvarez","submitted_at":"2026-06-10T22:28:53Z","abstract_excerpt":"Anticipating LLM behavioral tendencies from low-cost psychometric probes is critical for safe deployment, but only if self-reports (SR) reliably predict behavior. Recent work documented substantial SR-behavior dissociation in LLMs, but relied on broad personality traits (Big 5) that predict specific behaviors weakly, even in humans. Furthermore, the isolation of conversational sessions combined with weak context matching left open whether LLMs truly lack coherence or whether the conditions needed to detect such coherence were not met. We contrast Big 5 with the Theory of Planned Behavior (TPB)"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.12730","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-10T22:28:53Z","cross_cats_sorted":["cs.CL","cs.CY","cs.LG"],"title_canon_sha256":"408093690e9c91a500cd378a23885976544aff1bda04c025bea609b4f0494a6c","abstract_canon_sha256":"26c08c3ddff9cb677c0696cc41fb726fc74e8ea3f63daa905e0699fb9bff7f32"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:08:47.754998Z","signature_b64":"sg6Q5ELlhGrVgILT3lq4gOOIMiLGEA0U9yU66q3bI11o+QTlnc9y3sklO+I+yWyrFVt3+ffw/3q2N0XoVwoeDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"69161d3e4276bb17efeaf5cb5740d724a6f4514c0ca31602c954c35c9b5b2043","last_reissued_at":"2026-06-12T01:08:47.754085Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:08:47.754085Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Rethinking Psychometric Evaluation of LLMs: When and Why Self-Reports Predict Behavior","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.CY","cs.LG"],"primary_cat":"cs.AI","authors_text":"Anima Anandkumar, Dean Mobbs, Myrl G. Marmarelis, Peiyang Song, Pengrui Han, Rafal Kocielnik, Ramit Debnath, R. Michael Alvarez","submitted_at":"2026-06-10T22:28:53Z","abstract_excerpt":"Anticipating LLM behavioral tendencies from low-cost psychometric probes is critical for safe deployment, but only if self-reports (SR) reliably predict behavior. Recent work documented substantial SR-behavior dissociation in LLMs, but relied on broad personality traits (Big 5) that predict specific behaviors weakly, even in humans. Furthermore, the isolation of conversational sessions combined with weak context matching left open whether LLMs truly lack coherence or whether the conditions needed to detect such coherence were not met. We contrast Big 5 with the Theory of Planned Behavior (TPB)"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12730","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.12730/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.12730","created_at":"2026-06-12T01:08:47.754232+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.12730v1","created_at":"2026-06-12T01:08:47.754232+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12730","created_at":"2026-06-12T01:08:47.754232+00:00"},{"alias_kind":"pith_short_12","alias_value":"NELB2PSCO25R","created_at":"2026-06-12T01:08:47.754232+00:00"},{"alias_kind":"pith_short_16","alias_value":"NELB2PSCO25RP37K","created_at":"2026-06-12T01:08:47.754232+00:00"},{"alias_kind":"pith_short_8","alias_value":"NELB2PSC","created_at":"2026-06-12T01:08:47.754232+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES","json":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES.json","graph_json":"https://pith.science/api/pith-number/NELB2PSCO25RP37K6XFVOQGXES/graph.json","events_json":"https://pith.science/api/pith-number/NELB2PSCO25RP37K6XFVOQGXES/events.json","paper":"https://pith.science/paper/NELB2PSC"},"agent_actions":{"view_html":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES","download_json":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES.json","view_paper":"https://pith.science/paper/NELB2PSC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.12730&json=true","fetch_graph":"https://pith.science/api/pith-number/NELB2PSCO25RP37K6XFVOQGXES/graph.json","fetch_events":"https://pith.science/api/pith-number/NELB2PSCO25RP37K6XFVOQGXES/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES/action/storage_attestation","attest_author":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES/action/author_attestation","sign_citation":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES/action/citation_signature","submit_replication":"https://pith.science/pith/NELB2PSCO25RP37K6XFVOQGXES/action/replication_record"}},"created_at":"2026-06-12T01:08:47.754232+00:00","updated_at":"2026-06-12T01:08:47.754232+00:00"}