{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DWZHFPVTZ5C5KOHZYSEMJPZUL2","short_pith_number":"pith:DWZHFPVT","canonical_record":{"source":{"id":"2606.24530","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","cross_cats_sorted":[],"title_canon_sha256":"541d99b8d20eda738d8910ea828507467c363f7fbe99dfc6326810a37fdf0390","abstract_canon_sha256":"1001450d7e88557576347ad6941101a9330ad6557c650ee8c9316f239b3afb90"},"schema_version":"1.0"},"canonical_sha256":"1db272beb3cf45d538f9c488c4bf345e94a0789173f6e78c3f3c830826fffa44","source":{"kind":"arxiv","id":"2606.24530","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24530","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24530v1","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24530","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_12","alias_value":"DWZHFPVTZ5C5","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_16","alias_value":"DWZHFPVTZ5C5KOHZ","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_8","alias_value":"DWZHFPVT","created_at":"2026-06-24T01:15:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DWZHFPVTZ5C5KOHZYSEMJPZUL2","target":"record","payload":{"canonical_record":{"source":{"id":"2606.24530","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","cross_cats_sorted":[],"title_canon_sha256":"541d99b8d20eda738d8910ea828507467c363f7fbe99dfc6326810a37fdf0390","abstract_canon_sha256":"1001450d7e88557576347ad6941101a9330ad6557c650ee8c9316f239b3afb90"},"schema_version":"1.0"},"canonical_sha256":"1db272beb3cf45d538f9c488c4bf345e94a0789173f6e78c3f3c830826fffa44","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:33.146254Z","signature_b64":"YtBkR+8nWOJk4hy9ymhXFy+u7C+7GMuZoS6yT+iTlPYfAmIusIQyxAaNiyIpj9gks3NfFlfn717JvwwitvGzCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1db272beb3cf45d538f9c488c4bf345e94a0789173f6e78c3f3c830826fffa44","last_reissued_at":"2026-06-24T01:15:33.145898Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:33.145898Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.24530","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v8vDQq5y49eQ9BKW95ZNJ22jY5d95XmeTLGvuBQ6eECmvjFGrAMtcheEYrmXSk9rtRbwCPYPBDvNw+k0P7kdDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-26T05:02:17.167955Z"},"content_sha256":"781ebe1e1d4deb8658d539a5a2c94fdf8b0d7c60b578195033d3f1a653d83005","schema_version":"1.0","event_id":"sha256:781ebe1e1d4deb8658d539a5a2c94fdf8b0d7c60b578195033d3f1a653d83005"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DWZHFPVTZ5C5KOHZYSEMJPZUL2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"NatureBench: Can Coding Agents Match the Published SOTA of Nature-Family Papers?","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bingxiang He, Bowen Zhou, Che Jiang, Jincheng Zhong, Junlin Yang, Kaikai Zhao, Kai Tian, Kaiyan Zhang, Lejun Cheng, Ning Ding, Sihang Zeng, Weifeng Huang, Weizhi Wang, Yuchong Wang, Yuru Wang, Yuxin Zuo, Zhenzhao Yuan","submitted_at":"2026-06-23T12:58:23Z","abstract_excerpt":"We introduce NatureBench, a cross-discipline benchmark of 90 tasks distilled from peer-reviewed Nature-family publications, designed to evaluate whether AI coding agents can move beyond reproduction toward discovery on real scientific problems. NatureBench is built on NatureGym, an automated pipeline that constructs a standardized, per-task containerized environment from a source paper, addressing the environment-fragmentation problem that has limited the credibility of prior agent-on-research benchmarks. Evaluating ten frontier agent configurations under a strict web-search-disabled protocol,"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24530","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24530/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Oram9SU8Xo/HZnzAShGAWcWtYI1S+SUMABxxtxPJxyEhpovzGbGa28FGlmn6GanaVSRGBlJE4YuDo9iW289fBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-26T05:02:17.168352Z"},"content_sha256":"94a96b5c4cde26779afc8ff549d7f9de2f5a34353e36a87efa6d7508f705faff","schema_version":"1.0","event_id":"sha256:94a96b5c4cde26779afc8ff549d7f9de2f5a34353e36a87efa6d7508f705faff"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DWZHFPVTZ5C5KOHZYSEMJPZUL2/bundle.json","state_url":"https://pith.science/pith/DWZHFPVTZ5C5KOHZYSEMJPZUL2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DWZHFPVTZ5C5KOHZYSEMJPZUL2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-26T05:02:17Z","links":{"resolver":"https://pith.science/pith/DWZHFPVTZ5C5KOHZYSEMJPZUL2","bundle":"https://pith.science/pith/DWZHFPVTZ5C5KOHZYSEMJPZUL2/bundle.json","state":"https://pith.science/pith/DWZHFPVTZ5C5KOHZYSEMJPZUL2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DWZHFPVTZ5C5KOHZYSEMJPZUL2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DWZHFPVTZ5C5KOHZYSEMJPZUL2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1001450d7e88557576347ad6941101a9330ad6557c650ee8c9316f239b3afb90","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","title_canon_sha256":"541d99b8d20eda738d8910ea828507467c363f7fbe99dfc6326810a37fdf0390"},"schema_version":"1.0","source":{"id":"2606.24530","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24530","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24530v1","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24530","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_12","alias_value":"DWZHFPVTZ5C5","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_16","alias_value":"DWZHFPVTZ5C5KOHZ","created_at":"2026-06-24T01:15:33Z"},{"alias_kind":"pith_short_8","alias_value":"DWZHFPVT","created_at":"2026-06-24T01:15:33Z"}],"graph_snapshots":[{"event_id":"sha256:94a96b5c4cde26779afc8ff549d7f9de2f5a34353e36a87efa6d7508f705faff","target":"graph","created_at":"2026-06-24T01:15:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.24530/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We introduce NatureBench, a cross-discipline benchmark of 90 tasks distilled from peer-reviewed Nature-family publications, designed to evaluate whether AI coding agents can move beyond reproduction toward discovery on real scientific problems. NatureBench is built on NatureGym, an automated pipeline that constructs a standardized, per-task containerized environment from a source paper, addressing the environment-fragmentation problem that has limited the credibility of prior agent-on-research benchmarks. Evaluating ten frontier agent configurations under a strict web-search-disabled protocol,","authors_text":"Bingxiang He, Bowen Zhou, Che Jiang, Jincheng Zhong, Junlin Yang, Kaikai Zhao, Kai Tian, Kaiyan Zhang, Lejun Cheng, Ning Ding, Sihang Zeng, Weifeng Huang, Weizhi Wang, Yuchong Wang, Yuru Wang, Yuxin Zuo, Zhenzhao Yuan","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","title":"NatureBench: Can Coding Agents Match the Published SOTA of Nature-Family Papers?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24530","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:781ebe1e1d4deb8658d539a5a2c94fdf8b0d7c60b578195033d3f1a653d83005","target":"record","created_at":"2026-06-24T01:15:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1001450d7e88557576347ad6941101a9330ad6557c650ee8c9316f239b3afb90","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T12:58:23Z","title_canon_sha256":"541d99b8d20eda738d8910ea828507467c363f7fbe99dfc6326810a37fdf0390"},"schema_version":"1.0","source":{"id":"2606.24530","kind":"arxiv","version":1}},"canonical_sha256":"1db272beb3cf45d538f9c488c4bf345e94a0789173f6e78c3f3c830826fffa44","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1db272beb3cf45d538f9c488c4bf345e94a0789173f6e78c3f3c830826fffa44","first_computed_at":"2026-06-24T01:15:33.145898Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T01:15:33.145898Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"YtBkR+8nWOJk4hy9ymhXFy+u7C+7GMuZoS6yT+iTlPYfAmIusIQyxAaNiyIpj9gks3NfFlfn717JvwwitvGzCQ==","signature_status":"signed_v1","signed_at":"2026-06-24T01:15:33.146254Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.24530","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:781ebe1e1d4deb8658d539a5a2c94fdf8b0d7c60b578195033d3f1a653d83005","sha256:94a96b5c4cde26779afc8ff549d7f9de2f5a34353e36a87efa6d7508f705faff"],"state_sha256":"57d126ebcb7220685ab1a20563f63077ee4587833d41dab3cc76b1d6ba89b63e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"waFHpkzqBViBpW37qOGiSNxtduRciVtOlpVp8r1jSKMd76gFJBcmvirCDomVrqZ5nqvhTBaMQy72CiIgnju2BQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-26T05:02:17.171365Z","bundle_sha256":"026fc0c4bb949353ef625e5bf9367ae8e6aa2266f0ee74b9a86331295ba1d73a"}}