{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DWZHFPVTZ5C5KOHZYSEMJPZUL2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1001450d7e88557576347ad6941101a9330ad6557c650ee8c9316f239b3afb90","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","title_canon_sha256":"541d99b8d20eda738d8910ea828507467c363f7fbe99dfc6326810a37fdf0390"},"schema_version":"1.0","source":{"id":"2606.24530","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24530","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24530v1","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24530","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_12","alias_value":"DWZHFPVTZ5C5","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_16","alias_value":"DWZHFPVTZ5C5KOHZ","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_8","alias_value":"DWZHFPVT","created_at":"2026-06-24T01:15:33Z"}],"graph_snapshots":[{"event_id":"sha256:94a96b5c4cde26779afc8ff549d7f9de2f5a34353e36a87efa6d7508f705faff","target":"graph","created_at":"2026-06-24T01:15:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.24530/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We introduce NatureBench, a cross-discipline benchmark of 90 tasks distilled from peer-reviewed Nature-family publications, designed to evaluate whether AI coding agents can move beyond reproduction toward discovery on real scientific problems. NatureBench is built on NatureGym, an automated pipeline that constructs a standardized, per-task containerized environment from a source paper, addressing the environment-fragmentation problem that has limited the credibility of prior agent-on-research benchmarks. Evaluating ten frontier agent configurations under a strict web-search-disabled protocol,","authors_text":"Bingxiang He, Bowen Zhou, Che Jiang, Jincheng Zhong, Junlin Yang, Kaikai Zhao, Kai Tian, Kaiyan Zhang, Lejun Cheng, Ning Ding, Sihang Zeng, Weifeng Huang, Weizhi Wang, Yuchong Wang, Yuru Wang, Yuxin Zuo, Zhenzhao Yuan","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","title":"NatureBench: Can Coding Agents Match the Published SOTA of Nature-Family Papers?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24530","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:781ebe1e1d4deb8658d539a5a2c94fdf8b0d7c60b578195033d3f1a653d83005","target":"record","created_at":"2026-06-24T01:15:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1001450d7e88557576347ad6941101a9330ad6557c650ee8c9316f239b3afb90","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","title_canon_sha256":"541d99b8d20eda738d8910ea828507467c363f7fbe99dfc6326810a37fdf0390"},"schema_version":"1.0","source":{"id":"2606.24530","kind":"arxiv","version":1}},"canonical_sha256":"1db272beb3cf45d538f9c488c4bf345e94a0789173f6e78c3f3c830826fffa44","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1db272beb3cf45d538f9c488c4bf345e94a0789173f6e78c3f3c830826fffa44","first_computed_at":"2026-06-24T01:15:33.145898Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T01:15:33.145898Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"YtBkR+8nWOJk4hy9ymhXFy+u7C+7GMuZoS6yT+iTlPYfAmIusIQyxAaNiyIpj9gks3NfFlfn717JvwwitvGzCQ==","signature_status":"signed_v1","signed_at":"2026-06-24T01:15:33.146254Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.24530","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:781ebe1e1d4deb8658d539a5a2c94fdf8b0d7c60b578195033d3f1a653d83005","sha256:94a96b5c4cde26779afc8ff549d7f9de2f5a34353e36a87efa6d7508f705faff"],"state_sha256":"57d126ebcb7220685ab1a20563f63077ee4587833d41dab3cc76b1d6ba89b63e"}