{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:UJ3M7YHSRY6R67QQ7ZMVTX25TZ","short_pith_number":"pith:UJ3M7YHS","schema_version":"1.0","canonical_sha256":"a276cfe0f28e3d1f7e10fe5959df5d9e7a23772de9a764e88a3ec2287a429ae9","source":{"kind":"arxiv","id":"2606.13670","version":1},"attestation_state":"computed","paper":{"title":"Automated reproducibility assessments in the social and behavioral sciences using large language models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Anna Steinberg Schulten, Bolei Ma, Felix Henninger, Frauke Kreuter, Markus Weinmann, Pietro Marcolongo, Sarah Ball, Stefan Feuerriegel, Stefan Rose, Tobias Holtdirk","submitted_at":"2026-06-11T17:58:36Z","abstract_excerpt":"Reproducibility in the social and behavioral sciences is typically evaluated by independent researchers who reanalyze the original data to assess whether the published findings can be recovered. However, such approaches are resource-intensive and difficult to scale. Here, we show that large language models (LLMs) can automate reproducibility assessments. Using N=76 published studies with predefined claims from the behavioral and social sciences, we compare LLM-generated analysis with the original findings and human reanalysis. For 7 studies, the LLM could not produce a viable effect size estim"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.13670","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-11T17:58:36Z","cross_cats_sorted":[],"title_canon_sha256":"56edebe4dbf5d22bf8ebed7ca09d2f0dcb895d8138082083a40544a57f4d1132","abstract_canon_sha256":"03ff7db30878b46ec5a87bbb4b02359c89d448df3843c2a6c3a5caf93de79f50"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:10:22.532953Z","signature_b64":"tzjmklBcQz7aalQ/D5wJMnkjUijZbeBTQoB0BNdnSjQrqz71i1wngh60P8XD49bZYBpTo52l5j4SvnvCR6I0Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a276cfe0f28e3d1f7e10fe5959df5d9e7a23772de9a764e88a3ec2287a429ae9","last_reissued_at":"2026-06-12T01:10:22.532098Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:10:22.532098Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Automated reproducibility assessments in the social and behavioral sciences using large language models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Anna Steinberg Schulten, Bolei Ma, Felix Henninger, Frauke Kreuter, Markus Weinmann, Pietro Marcolongo, Sarah Ball, Stefan Feuerriegel, Stefan Rose, Tobias Holtdirk","submitted_at":"2026-06-11T17:58:36Z","abstract_excerpt":"Reproducibility in the social and behavioral sciences is typically evaluated by independent researchers who reanalyze the original data to assess whether the published findings can be recovered. However, such approaches are resource-intensive and difficult to scale. Here, we show that large language models (LLMs) can automate reproducibility assessments. Using N=76 published studies with predefined claims from the behavioral and social sciences, we compare LLM-generated analysis with the original findings and human reanalysis. For 7 studies, the LLM could not produce a viable effect size estim"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.13670","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.13670/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.13670","created_at":"2026-06-12T01:10:22.532234+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.13670v1","created_at":"2026-06-12T01:10:22.532234+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.13670","created_at":"2026-06-12T01:10:22.532234+00:00"},{"alias_kind":"pith_short_12","alias_value":"UJ3M7YHSRY6R","created_at":"2026-06-12T01:10:22.532234+00:00"},{"alias_kind":"pith_short_16","alias_value":"UJ3M7YHSRY6R67QQ","created_at":"2026-06-12T01:10:22.532234+00:00"},{"alias_kind":"pith_short_8","alias_value":"UJ3M7YHS","created_at":"2026-06-12T01:10:22.532234+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ","json":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ.json","graph_json":"https://pith.science/api/pith-number/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/graph.json","events_json":"https://pith.science/api/pith-number/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/events.json","paper":"https://pith.science/paper/UJ3M7YHS"},"agent_actions":{"view_html":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ","download_json":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ.json","view_paper":"https://pith.science/paper/UJ3M7YHS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.13670&json=true","fetch_graph":"https://pith.science/api/pith-number/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/graph.json","fetch_events":"https://pith.science/api/pith-number/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/action/storage_attestation","attest_author":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/action/author_attestation","sign_citation":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/action/citation_signature","submit_replication":"https://pith.science/pith/UJ3M7YHSRY6R67QQ7ZMVTX25TZ/action/replication_record"}},"created_at":"2026-06-12T01:10:22.532234+00:00","updated_at":"2026-06-12T01:10:22.532234+00:00"}