{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:A64RAU5ZNYD3WK6V5LYY55WIAY","short_pith_number":"pith:A64RAU5Z","schema_version":"1.0","canonical_sha256":"07b91053b96e07bb2bd5eaf18ef6c806302a5d2024279ba1c8aeec0df53241e6","source":{"kind":"arxiv","id":"1703.00426","version":1},"attestation_state":"computed","paper":{"title":"HolStep: A Machine Learning Dataset for Higher-order Logic Theorem Proving","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Cezary Kaliszyk, Christian Szegedy, Fran\\c{c}ois Chollet","submitted_at":"2017-03-01T18:20:19Z","abstract_excerpt":"Large computer-understandable proofs consist of millions of intermediate logical steps. The vast majority of such steps originate from manually selected and manually guided heuristics applied to intermediate goals. So far, machine learning has generally not been used to filter or generate these steps. In this paper, we introduce a new dataset based on Higher-Order Logic (HOL) proofs, for the purpose of developing new machine learning-based theorem-proving strategies. We make this dataset publicly available under the BSD license. We propose various machine learning tasks that can be performed o"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1703.00426","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-03-01T18:20:19Z","cross_cats_sorted":[],"title_canon_sha256":"1144a496971fab41fd7a490d1450e11e24b975ae7eaf16ccb52cac511655939e","abstract_canon_sha256":"a873395b1a5c40c689e3ebfe41736079b9a58cd13fe30e6686f6a8563f0d1f9b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:49:42.982411Z","signature_b64":"awQ4/12IeoqvE4rNa+tQzYjU1LbxR09+yLcVjNsa2X+oQ+l5EEk9+PyDd6qDEMdhaXFq/MXldnl1oLCg7T//Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"07b91053b96e07bb2bd5eaf18ef6c806302a5d2024279ba1c8aeec0df53241e6","last_reissued_at":"2026-05-18T00:49:42.981930Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:49:42.981930Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"HolStep: A Machine Learning Dataset for Higher-order Logic Theorem Proving","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Cezary Kaliszyk, Christian Szegedy, Fran\\c{c}ois Chollet","submitted_at":"2017-03-01T18:20:19Z","abstract_excerpt":"Large computer-understandable proofs consist of millions of intermediate logical steps. The vast majority of such steps originate from manually selected and manually guided heuristics applied to intermediate goals. So far, machine learning has generally not been used to filter or generate these steps. In this paper, we introduce a new dataset based on Higher-Order Logic (HOL) proofs, for the purpose of developing new machine learning-based theorem-proving strategies. We make this dataset publicly available under the BSD license. We propose various machine learning tasks that can be performed o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.00426","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1703.00426","created_at":"2026-05-18T00:49:42.982006+00:00"},{"alias_kind":"arxiv_version","alias_value":"1703.00426v1","created_at":"2026-05-18T00:49:42.982006+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.00426","created_at":"2026-05-18T00:49:42.982006+00:00"},{"alias_kind":"pith_short_12","alias_value":"A64RAU5ZNYD3","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_16","alias_value":"A64RAU5ZNYD3WK6V","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_8","alias_value":"A64RAU5Z","created_at":"2026-05-18T12:31:05.417338+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2605.09012","citing_title":"Re$^2$Math: Benchmarking Theorem Retrieval in Research-Level Mathematics","ref_index":11,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY","json":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY.json","graph_json":"https://pith.science/api/pith-number/A64RAU5ZNYD3WK6V5LYY55WIAY/graph.json","events_json":"https://pith.science/api/pith-number/A64RAU5ZNYD3WK6V5LYY55WIAY/events.json","paper":"https://pith.science/paper/A64RAU5Z"},"agent_actions":{"view_html":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY","download_json":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY.json","view_paper":"https://pith.science/paper/A64RAU5Z","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1703.00426&json=true","fetch_graph":"https://pith.science/api/pith-number/A64RAU5ZNYD3WK6V5LYY55WIAY/graph.json","fetch_events":"https://pith.science/api/pith-number/A64RAU5ZNYD3WK6V5LYY55WIAY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY/action/storage_attestation","attest_author":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY/action/author_attestation","sign_citation":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY/action/citation_signature","submit_replication":"https://pith.science/pith/A64RAU5ZNYD3WK6V5LYY55WIAY/action/replication_record"}},"created_at":"2026-05-18T00:49:42.982006+00:00","updated_at":"2026-05-18T00:49:42.982006+00:00"}