{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:JUP2Q7U5OJMPVWDHDC4JXKLRS5","short_pith_number":"pith:JUP2Q7U5","canonical_record":{"source":{"id":"2505.11831","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","cross_cats_sorted":[],"title_canon_sha256":"597cec621d6cb1f3586f0bdb55926f5783a602101251a516d1835619aa5bbd88","abstract_canon_sha256":"c666d70bf85b4a5989194f51c6434f3e73d3397e9c1081c732e66dc8f0811ec8"},"schema_version":"1.0"},"canonical_sha256":"4d1fa87e9d7258fad86718b89ba97197530ba15f60f9eb57cc3f5466b683aef2","source":{"kind":"arxiv","id":"2505.11831","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.11831","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"arxiv_version","alias_value":"2505.11831v2","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.11831","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"pith_short_12","alias_value":"JUP2Q7U5OJMP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"JUP2Q7U5OJMPVWDH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"JUP2Q7U5","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:JUP2Q7U5OJMPVWDHDC4JXKLRS5","target":"record","payload":{"canonical_record":{"source":{"id":"2505.11831","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","cross_cats_sorted":[],"title_canon_sha256":"597cec621d6cb1f3586f0bdb55926f5783a602101251a516d1835619aa5bbd88","abstract_canon_sha256":"c666d70bf85b4a5989194f51c6434f3e73d3397e9c1081c732e66dc8f0811ec8"},"schema_version":"1.0"},"canonical_sha256":"4d1fa87e9d7258fad86718b89ba97197530ba15f60f9eb57cc3f5466b683aef2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:50.833892Z","signature_b64":"Xz6mDPujL5SHXXWAz7DKkOMGR4OBN4TBb9Dx3im1aM2iIJ0cAMYNgmH4Uu3LxXIPZXciUVGTaK1FN8i7j7YaBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4d1fa87e9d7258fad86718b89ba97197530ba15f60f9eb57cc3f5466b683aef2","last_reissued_at":"2026-05-17T23:38:50.833466Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:50.833466Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2505.11831","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pGhBgrGkUsPikhahNlyuW6wF44QTQu2QmMv/iyLqvmQLkIb+oi3kdtO9Ev8cU0CXbTCYIBcFTjf/ZIFF9Bj/Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T04:04:58.575625Z"},"content_sha256":"052ceb6723d4d9fba484b61dbe4aaaa281fbf85e9e5b77369ef7df1dd290d734","schema_version":"1.0","event_id":"sha256:052ceb6723d4d9fba484b61dbe4aaaa281fbf85e9e5b77369ef7df1dd290d734"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:JUP2Q7U5OJMPVWDHDC4JXKLRS5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ARC-AGI-2: A New Challenge for Frontier AI Reasoning Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Bryan Landers, Francois Chollet, Gregory Kamradt, Henry Pinkard, Mike Knoop","submitted_at":"2025-05-17T04:34:48Z","abstract_excerpt":"The Abstraction and Reasoning Corpus for Artificial General Intelligence (ARC-AGI), introduced in 2019, established a challenging benchmark for evaluating the general fluid intelligence of artificial systems via a set of unique, novel tasks only requiring minimal prior knowledge. While ARC-AGI has spurred significant research activity over the past five years, recent AI progress calls for benchmarks capable of finer-grained evaluation at higher levels of cognitive complexity. We introduce ARC-AGI-2, an upgraded version of the benchmark. ARC-AGI-2 preserves the input-output pair task format of "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"ARC-AGI-2 incorporates a newly curated and expanded set of tasks specifically designed to provide a more granular signal to assess abstract reasoning and problem-solving abilities at higher levels of fluid intelligence.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The newly selected tasks genuinely require higher levels of fluid intelligence with only minimal prior knowledge, and the human testing protocol produces a reliable and representative baseline.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"ARC-AGI-2 adds a larger, more complex set of tasks to the original ARC-AGI benchmark to give finer-grained measurement of fluid intelligence in AI.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a67d66da04baded035b7fa20132dcde441bbfb28ea7a45f96eb6681492283edd"},"source":{"id":"2505.11831","kind":"arxiv","version":2},"verdict":{"id":"51689693-5cd4-4e9c-9783-4e24d71b017c","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T16:45:48.667064Z","strongest_claim":"ARC-AGI-2 incorporates a newly curated and expanded set of tasks specifically designed to provide a more granular signal to assess abstract reasoning and problem-solving abilities at higher levels of fluid intelligence.","one_line_summary":"ARC-AGI-2 adds a larger, more complex set of tasks to the original ARC-AGI benchmark to give finer-grained measurement of fluid intelligence in AI.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The newly selected tasks genuinely require higher levels of fluid intelligence with only minimal prior knowledge, and the human testing protocol produces a reliable and representative baseline.","pith_extraction_headline":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems."},"references":{"count":13,"sample":[{"doi":"","year":null,"title":"ARC Prize - Leaderboard.https://arcprize.org/leaderboard","work_id":"554fed08-e4ae-4231-bd02-0429d9a3a2dd","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"ARC Prize - Policy.https://arcprize.org/policy","work_id":"6db1d41d-6cb8-4ebf-83ed-b6446c01cb39","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Kaggle competition","work_id":"f2b15558-bb48-42b5-8df2-e1a2b24d0dc0","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Lab42 competi- tion","work_id":"ff6dbefe-0344-49fd-993a-56618353547c","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Lab42 competi- tion","work_id":"612eb557-b344-4e61-ba1a-a7cfb956ca8e","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":13,"snapshot_sha256":"26c8c782e89c6f0fab091170754f0008a278e39827819865d1f325adefa0e17a","internal_anchors":0},"formal_canon":{"evidence_count":3,"snapshot_sha256":"54f0f11ce90d1acbf158e729524215e13365c12af772479d07a4466ff1fc4f06"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"51689693-5cd4-4e9c-9783-4e24d71b017c"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Kuksnyfv0P7kDtNi8VLM/HthgVg+tfOm66zKQxtHxX0FucW856A5ByrlS6O2jbwSdJTtLNhGSL4apkAsojpKDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T04:04:58.576990Z"},"content_sha256":"e1fc03e63086257a04af330303e426cb56cf6588ff541e2ef6ab7f8c64076647","schema_version":"1.0","event_id":"sha256:e1fc03e63086257a04af330303e426cb56cf6588ff541e2ef6ab7f8c64076647"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/JUP2Q7U5OJMPVWDHDC4JXKLRS5/bundle.json","state_url":"https://pith.science/pith/JUP2Q7U5OJMPVWDHDC4JXKLRS5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/JUP2Q7U5OJMPVWDHDC4JXKLRS5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T04:04:58Z","links":{"resolver":"https://pith.science/pith/JUP2Q7U5OJMPVWDHDC4JXKLRS5","bundle":"https://pith.science/pith/JUP2Q7U5OJMPVWDHDC4JXKLRS5/bundle.json","state":"https://pith.science/pith/JUP2Q7U5OJMPVWDHDC4JXKLRS5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/JUP2Q7U5OJMPVWDHDC4JXKLRS5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:JUP2Q7U5OJMPVWDHDC4JXKLRS5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c666d70bf85b4a5989194f51c6434f3e73d3397e9c1081c732e66dc8f0811ec8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","title_canon_sha256":"597cec621d6cb1f3586f0bdb55926f5783a602101251a516d1835619aa5bbd88"},"schema_version":"1.0","source":{"id":"2505.11831","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.11831","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"arxiv_version","alias_value":"2505.11831v2","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.11831","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"pith_short_12","alias_value":"JUP2Q7U5OJMP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"JUP2Q7U5OJMPVWDH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"JUP2Q7U5","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:e1fc03e63086257a04af330303e426cb56cf6588ff541e2ef6ab7f8c64076647","target":"graph","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"ARC-AGI-2 incorporates a newly curated and expanded set of tasks specifically designed to provide a more granular signal to assess abstract reasoning and problem-solving abilities at higher levels of fluid intelligence."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The newly selected tasks genuinely require higher levels of fluid intelligence with only minimal prior knowledge, and the human testing protocol produces a reliable and representative baseline."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ARC-AGI-2 adds a larger, more complex set of tasks to the original ARC-AGI benchmark to give finer-grained measurement of fluid intelligence in AI."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems."}],"snapshot_sha256":"a67d66da04baded035b7fa20132dcde441bbfb28ea7a45f96eb6681492283edd"},"formal_canon":{"evidence_count":3,"snapshot_sha256":"54f0f11ce90d1acbf158e729524215e13365c12af772479d07a4466ff1fc4f06"},"paper":{"abstract_excerpt":"The Abstraction and Reasoning Corpus for Artificial General Intelligence (ARC-AGI), introduced in 2019, established a challenging benchmark for evaluating the general fluid intelligence of artificial systems via a set of unique, novel tasks only requiring minimal prior knowledge. While ARC-AGI has spurred significant research activity over the past five years, recent AI progress calls for benchmarks capable of finer-grained evaluation at higher levels of cognitive complexity. We introduce ARC-AGI-2, an upgraded version of the benchmark. ARC-AGI-2 preserves the input-output pair task format of ","authors_text":"Bryan Landers, Francois Chollet, Gregory Kamradt, Henry Pinkard, Mike Knoop","cross_cats":[],"headline":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","title":"ARC-AGI-2: A New Challenge for Frontier AI Reasoning Systems"},"references":{"count":13,"internal_anchors":0,"resolved_work":13,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"ARC Prize - Leaderboard.https://arcprize.org/leaderboard","work_id":"554fed08-e4ae-4231-bd02-0429d9a3a2dd","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"ARC Prize - Policy.https://arcprize.org/policy","work_id":"6db1d41d-6cb8-4ebf-83ed-b6446c01cb39","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Kaggle competition","work_id":"f2b15558-bb48-42b5-8df2-e1a2b24d0dc0","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Lab42 competi- tion","work_id":"ff6dbefe-0344-49fd-993a-56618353547c","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Lab42 competi- tion","work_id":"612eb557-b344-4e61-ba1a-a7cfb956ca8e","year":2023}],"snapshot_sha256":"26c8c782e89c6f0fab091170754f0008a278e39827819865d1f325adefa0e17a"},"source":{"id":"2505.11831","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T16:45:48.667064Z","id":"51689693-5cd4-4e9c-9783-4e24d71b017c","model_set":{"reader":"grok-4.3"},"one_line_summary":"ARC-AGI-2 adds a larger, more complex set of tasks to the original ARC-AGI benchmark to give finer-grained measurement of fluid intelligence in AI.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"ARC-AGI-2 introduces an expanded set of tasks to evaluate higher levels of abstract reasoning in AI systems.","strongest_claim":"ARC-AGI-2 incorporates a newly curated and expanded set of tasks specifically designed to provide a more granular signal to assess abstract reasoning and problem-solving abilities at higher levels of fluid intelligence.","weakest_assumption":"The newly selected tasks genuinely require higher levels of fluid intelligence with only minimal prior knowledge, and the human testing protocol produces a reliable and representative baseline."}},"verdict_id":"51689693-5cd4-4e9c-9783-4e24d71b017c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:052ceb6723d4d9fba484b61dbe4aaaa281fbf85e9e5b77369ef7df1dd290d734","target":"record","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c666d70bf85b4a5989194f51c6434f3e73d3397e9c1081c732e66dc8f0811ec8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-17T04:34:48Z","title_canon_sha256":"597cec621d6cb1f3586f0bdb55926f5783a602101251a516d1835619aa5bbd88"},"schema_version":"1.0","source":{"id":"2505.11831","kind":"arxiv","version":2}},"canonical_sha256":"4d1fa87e9d7258fad86718b89ba97197530ba15f60f9eb57cc3f5466b683aef2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4d1fa87e9d7258fad86718b89ba97197530ba15f60f9eb57cc3f5466b683aef2","first_computed_at":"2026-05-17T23:38:50.833466Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:50.833466Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Xz6mDPujL5SHXXWAz7DKkOMGR4OBN4TBb9Dx3im1aM2iIJ0cAMYNgmH4Uu3LxXIPZXciUVGTaK1FN8i7j7YaBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:50.833892Z","signed_message":"canonical_sha256_bytes"},"source_id":"2505.11831","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:052ceb6723d4d9fba484b61dbe4aaaa281fbf85e9e5b77369ef7df1dd290d734","sha256:e1fc03e63086257a04af330303e426cb56cf6588ff541e2ef6ab7f8c64076647"],"state_sha256":"f6a9511c1bbe7fbebc4bdda1de2039974d8ccd1de791389985b8e14be3cff8fb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/2WF4XmdnAJQ0PPffFKf7pcB36aFClaiJMn+PI0P+nqF5OQL+6eofaSkABH5vUWrqt3CUBgb2Ybq4IHwIh5GDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T04:04:58.582169Z","bundle_sha256":"9178b37189880c07f5df21f81effe6d6fc36706b9446a825f1a1cd869eff7b4b"}}