{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:PJCCLG6B373RNQ4562PEN3RJ5H","short_pith_number":"pith:PJCCLG6B","schema_version":"1.0","canonical_sha256":"7a44259bc1dff716c39df69e46ee29e9f48fed688cfcc405792c16c7e0d6bd53","source":{"kind":"arxiv","id":"2606.10799","version":1},"attestation_state":"computed","paper":{"title":"Evaluating Research-Level Math Proofs via Strict Step-Level Verification","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Yifeng Sun","submitted_at":"2026-06-09T12:46:35Z","abstract_excerpt":"Large Language Models (LLMs) struggle to rigorously verify complex mathematical proofs. Standard global evaluation approaches suffer from \"context poisoning,\" in which superficially plausible statements mask subtle logical flaws, leading to hallucination or over-skepticism. To address this, we shift from global evaluation to strict step-level verification: our framework maintains detailed context for each deduction step and strictly constrains the sources of applied theorems. We evaluate on a carefully curated adversarial diagnostic suite of research-level proofs drawn from the FirstProof chal"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.10799","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-09T12:46:35Z","cross_cats_sorted":[],"title_canon_sha256":"367ce6a0a5dcad459c07c59b9560f20ba08b4271a14616279f973f281c84b25c","abstract_canon_sha256":"064fc10284ceb5396ab9f9a6a1489f3fa67342db15d3762e070a3458e78b525d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:10:41.166084Z","signature_b64":"qStLAGz/+gTWQOL4Axf/7FfXpMWZBSPmVwshjBzqyjTMidwo9I4I+c1hav5zUQ4PYPfMOnzv86lz7/bcSGzlDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7a44259bc1dff716c39df69e46ee29e9f48fed688cfcc405792c16c7e0d6bd53","last_reissued_at":"2026-06-10T01:10:41.165626Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:10:41.165626Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Evaluating Research-Level Math Proofs via Strict Step-Level Verification","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Yifeng Sun","submitted_at":"2026-06-09T12:46:35Z","abstract_excerpt":"Large Language Models (LLMs) struggle to rigorously verify complex mathematical proofs. Standard global evaluation approaches suffer from \"context poisoning,\" in which superficially plausible statements mask subtle logical flaws, leading to hallucination or over-skepticism. To address this, we shift from global evaluation to strict step-level verification: our framework maintains detailed context for each deduction step and strictly constrains the sources of applied theorems. We evaluate on a carefully curated adversarial diagnostic suite of research-level proofs drawn from the FirstProof chal"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.10799","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.10799/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.10799","created_at":"2026-06-10T01:10:41.165688+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.10799v1","created_at":"2026-06-10T01:10:41.165688+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.10799","created_at":"2026-06-10T01:10:41.165688+00:00"},{"alias_kind":"pith_short_12","alias_value":"PJCCLG6B373R","created_at":"2026-06-10T01:10:41.165688+00:00"},{"alias_kind":"pith_short_16","alias_value":"PJCCLG6B373RNQ45","created_at":"2026-06-10T01:10:41.165688+00:00"},{"alias_kind":"pith_short_8","alias_value":"PJCCLG6B","created_at":"2026-06-10T01:10:41.165688+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H","json":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H.json","graph_json":"https://pith.science/api/pith-number/PJCCLG6B373RNQ4562PEN3RJ5H/graph.json","events_json":"https://pith.science/api/pith-number/PJCCLG6B373RNQ4562PEN3RJ5H/events.json","paper":"https://pith.science/paper/PJCCLG6B"},"agent_actions":{"view_html":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H","download_json":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H.json","view_paper":"https://pith.science/paper/PJCCLG6B","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.10799&json=true","fetch_graph":"https://pith.science/api/pith-number/PJCCLG6B373RNQ4562PEN3RJ5H/graph.json","fetch_events":"https://pith.science/api/pith-number/PJCCLG6B373RNQ4562PEN3RJ5H/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H/action/storage_attestation","attest_author":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H/action/author_attestation","sign_citation":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H/action/citation_signature","submit_replication":"https://pith.science/pith/PJCCLG6B373RNQ4562PEN3RJ5H/action/replication_record"}},"created_at":"2026-06-10T01:10:41.165688+00:00","updated_at":"2026-06-10T01:10:41.165688+00:00"}