{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:SQNMKGXM2LV7NTHVHUZBIOY5OO","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8f87499f968fbbc6c602eced5ea3412b1c075e7e51195e5c8b06eb9bc0d8049c","cross_cats_sorted":["cs.AI","cs.MA"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-09-17T17:13:19Z","title_canon_sha256":"c24d6d2df40f0c5e53a80e16e17056eaebaea7c3a1036105b6d5045d69c94783"},"schema_version":"1.0","source":{"id":"2409.11363","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2409.11363","created_at":"2026-06-24T00:14:20Z"},{"alias_kind":"arxiv_version","alias_value":"2409.11363v2","created_at":"2026-06-24T00:14:20Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.11363","created_at":"2026-06-24T00:14:20Z"},{"alias_kind":"pith_short_12","alias_value":"SQNMKGXM2LV7","created_at":"2026-06-24T00:14:20Z"},{"alias_kind":"pith_short_16","alias_value":"SQNMKGXM2LV7NTHV","created_at":"2026-06-24T00:14:20Z"},{"alias_kind":"pith_short_8","alias_value":"SQNMKGXM","created_at":"2026-06-24T00:14:20Z"}],"graph_snapshots":[{"event_id":"sha256:c98a497c428d7ff41501533ca454464eb92ef9a0f713bd02b4dec2a998cb4c17","target":"graph","created_at":"2026-06-24T00:14:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2409.11363/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"AI agents have the potential to aid users on a variety of consequential tasks, including conducting scientific research. To spur the development of useful agents, we need benchmarks that are challenging, but more crucially, directly correspond to real-world tasks of interest. This paper introduces such a benchmark, designed to measure the accuracy of AI agents in tackling a crucial yet surprisingly challenging aspect of scientific research: computational reproducibility. This task, fundamental to the scientific process, involves reproducing the results of a study using the provided code and da","authors_text":"Arvind Narayanan, Benedikt Stroebl, Nitya Nadgir, Sayash Kapoor, Zachary S. Siegel","cross_cats":["cs.AI","cs.MA"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-09-17T17:13:19Z","title":"CORE-Bench: Fostering the Credibility of Published Research Through a Computational Reproducibility Agent Benchmark"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.11363","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bc1253c83f69aa80b5d2b6c94d96081cdc2e43c64f6a108b1987f489b61da200","target":"record","created_at":"2026-06-24T00:14:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8f87499f968fbbc6c602eced5ea3412b1c075e7e51195e5c8b06eb9bc0d8049c","cross_cats_sorted":["cs.AI","cs.MA"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-09-17T17:13:19Z","title_canon_sha256":"c24d6d2df40f0c5e53a80e16e17056eaebaea7c3a1036105b6d5045d69c94783"},"schema_version":"1.0","source":{"id":"2409.11363","kind":"arxiv","version":2}},"canonical_sha256":"941ac51aecd2ebf6ccf53d32143b1d7384e081790fe7402faf036ba146c2387b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"941ac51aecd2ebf6ccf53d32143b1d7384e081790fe7402faf036ba146c2387b","first_computed_at":"2026-06-24T00:14:20.107923Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T00:14:20.107923Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8PuXi+QEytd1ZhbEV1Pr8d1CZ1UvYysqiNbyOw9ynCFFKXEZkvhdCxzzSFV1XOLrp4fJsGhqRBe5ld69OOxhAA==","signature_status":"signed_v1","signed_at":"2026-06-24T00:14:20.108486Z","signed_message":"canonical_sha256_bytes"},"source_id":"2409.11363","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bc1253c83f69aa80b5d2b6c94d96081cdc2e43c64f6a108b1987f489b61da200","sha256:c98a497c428d7ff41501533ca454464eb92ef9a0f713bd02b4dec2a998cb4c17"],"state_sha256":"b5de6d4738c03abc05ea8872c319a4e84470461b32336e1cf788ff19f4c89add"}