{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:C6BFU545DD7RW6PMM2VTJX7B2C","short_pith_number":"pith:C6BFU545","canonical_record":{"source":{"id":"2510.04374","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-05T21:36:43Z","cross_cats_sorted":["cs.AI","cs.CY"],"title_canon_sha256":"98ad289b766a236ffb8e8459c62ec63bee942b19ec85473290763a0fb9ed4d41","abstract_canon_sha256":"3e907bdaf254be3ce564273e978e1bc5f5a1f12490ec69fc10eedd5df91fda4c"},"schema_version":"1.0"},"canonical_sha256":"17825a779d18ff1b79ec66ab34dfe1d08698c6913c9055352154a735876fb74c","source":{"kind":"arxiv","id":"2510.04374","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.04374","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"arxiv_version","alias_value":"2510.04374v1","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.04374","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"pith_short_12","alias_value":"C6BFU545DD7R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"C6BFU545DD7RW6PM","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"C6BFU545","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:C6BFU545DD7RW6PMM2VTJX7B2C","target":"record","payload":{"canonical_record":{"source":{"id":"2510.04374","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-05T21:36:43Z","cross_cats_sorted":["cs.AI","cs.CY"],"title_canon_sha256":"98ad289b766a236ffb8e8459c62ec63bee942b19ec85473290763a0fb9ed4d41","abstract_canon_sha256":"3e907bdaf254be3ce564273e978e1bc5f5a1f12490ec69fc10eedd5df91fda4c"},"schema_version":"1.0"},"canonical_sha256":"17825a779d18ff1b79ec66ab34dfe1d08698c6913c9055352154a735876fb74c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:48.067227Z","signature_b64":"VVtcqc3AHiINtBCNIBbTfRE+Iutnyl/lnATZXqqKKpku2UVVcqmz9m6ZYiP3f4j3BYlUsDj0nHO0BE2KHtpgCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"17825a779d18ff1b79ec66ab34dfe1d08698c6913c9055352154a735876fb74c","last_reissued_at":"2026-05-17T23:38:48.066776Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:48.066776Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.04374","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2Jr85AFFJitGjwS6buisK1ZzJayB4K1g0KmhUz767LRJDyylOOGMdpoE7TmWA/rVl7xR2bPRKbXOB29XlPWQCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T21:30:49.077208Z"},"content_sha256":"32defff5fda8cb4f953f1644cc952ecdef0a2c74168ec099297cb3a3f0b447a8","schema_version":"1.0","event_id":"sha256:32defff5fda8cb4f953f1644cc952ecdef0a2c74168ec099297cb3a3f0b447a8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:C6BFU545DD7RW6PMM2VTJX7B2C","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"GDPval: Evaluating AI Model Performance on Real-World Economically Valuable Tasks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Frontier AI models approach industry experts in quality on real-world economically valuable tasks.","cross_cats":["cs.AI","cs.CY"],"primary_cat":"cs.LG","authors_text":"Alexandra Barr, Amelia Glaese, David Li, Elizabeth Proehl, Gildas Chabot, Grace Kim, Jerry Tworek, Laurance Fauconnet, Marwan Aljubeh, Michael Sharman, Michele Wang, Natalie S. Kim, Olivia Watkins, Patrick Chao, Phoebe Thacker, Rachel Dias, Samuel Miserendino, Sim\\'on Posada Fishman, Tejal Patwardhan","submitted_at":"2025-10-05T21:36:43Z","abstract_excerpt":"We introduce GDPval, a benchmark evaluating AI model capabilities on real-world economically valuable tasks. GDPval covers the majority of U.S. Bureau of Labor Statistics Work Activities for 44 occupations across the top 9 sectors contributing to U.S. GDP (Gross Domestic Product). Tasks are constructed from the representative work of industry professionals with an average of 14 years of experience. We find that frontier model performance on GDPval is improving roughly linearly over time, and that the current best frontier models are approaching industry experts in deliverable quality. We analy"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"frontier model performance on GDPval is improving roughly linearly over time, and that the current best frontier models are approaching industry experts in deliverable quality","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"that the selected tasks and expert ratings accurately represent the full range of economically valuable work and that automated grading reliably matches human expert judgment on deliverable quality","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"GDPval benchmark finds frontier AI models approaching industry experts on economically valuable tasks from high-GDP sectors, with linear performance gains over time.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Frontier AI models approach industry experts in quality on real-world economically valuable tasks.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"598e2d628e61077b67f37794ca9742af834e58c489832ae066cfb08fa0c5dec1"},"source":{"id":"2510.04374","kind":"arxiv","version":1},"verdict":{"id":"774665a5-7f98-469d-87bc-f852f2954c87","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T11:08:12.153746Z","strongest_claim":"frontier model performance on GDPval is improving roughly linearly over time, and that the current best frontier models are approaching industry experts in deliverable quality","one_line_summary":"GDPval benchmark finds frontier AI models approaching industry experts on economically valuable tasks from high-GDP sectors, with linear performance gains over time.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"that the selected tasks and expert ratings accurately represent the full range of economically valuable work and that automated grading reliably matches human expert judgment on deliverable quality","pith_extraction_headline":"Frontier AI models approach industry experts in quality on real-world economically valuable tasks."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":3,"snapshot_sha256":"06fa6c1ba32e45605f782d631000b95db535fcc5c0770c0f5eb581c4bf9ea9ff"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"774665a5-7f98-469d-87bc-f852f2954c87"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Y86LUEyP0o5UI/QauyhS/vpw/0ViOpNsbaBEYDGH7yNROuYe2qZRG1vzEc5nBpwoEeMUltcYvkgJKY5rwcSvBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T21:30:49.077671Z"},"content_sha256":"23a2ffb68438c481dd30f5c99717f486e284da7d62fb8e98d906f4c3f23923e1","schema_version":"1.0","event_id":"sha256:23a2ffb68438c481dd30f5c99717f486e284da7d62fb8e98d906f4c3f23923e1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/C6BFU545DD7RW6PMM2VTJX7B2C/bundle.json","state_url":"https://pith.science/pith/C6BFU545DD7RW6PMM2VTJX7B2C/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/C6BFU545DD7RW6PMM2VTJX7B2C/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T21:30:49Z","links":{"resolver":"https://pith.science/pith/C6BFU545DD7RW6PMM2VTJX7B2C","bundle":"https://pith.science/pith/C6BFU545DD7RW6PMM2VTJX7B2C/bundle.json","state":"https://pith.science/pith/C6BFU545DD7RW6PMM2VTJX7B2C/state.json","well_known_bundle":"https://pith.science/.well-known/pith/C6BFU545DD7RW6PMM2VTJX7B2C/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:C6BFU545DD7RW6PMM2VTJX7B2C","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3e907bdaf254be3ce564273e978e1bc5f5a1f12490ec69fc10eedd5df91fda4c","cross_cats_sorted":["cs.AI","cs.CY"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-05T21:36:43Z","title_canon_sha256":"98ad289b766a236ffb8e8459c62ec63bee942b19ec85473290763a0fb9ed4d41"},"schema_version":"1.0","source":{"id":"2510.04374","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.04374","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"arxiv_version","alias_value":"2510.04374v1","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.04374","created_at":"2026-05-17T23:38:48Z"},{"alias_kind":"pith_short_12","alias_value":"C6BFU545DD7R","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"C6BFU545DD7RW6PM","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"C6BFU545","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:23a2ffb68438c481dd30f5c99717f486e284da7d62fb8e98d906f4c3f23923e1","target":"graph","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"frontier model performance on GDPval is improving roughly linearly over time, and that the current best frontier models are approaching industry experts in deliverable quality"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"that the selected tasks and expert ratings accurately represent the full range of economically valuable work and that automated grading reliably matches human expert judgment on deliverable quality"},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"GDPval benchmark finds frontier AI models approaching industry experts on economically valuable tasks from high-GDP sectors, with linear performance gains over time."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Frontier AI models approach industry experts in quality on real-world economically valuable tasks."}],"snapshot_sha256":"598e2d628e61077b67f37794ca9742af834e58c489832ae066cfb08fa0c5dec1"},"formal_canon":{"evidence_count":3,"snapshot_sha256":"06fa6c1ba32e45605f782d631000b95db535fcc5c0770c0f5eb581c4bf9ea9ff"},"paper":{"abstract_excerpt":"We introduce GDPval, a benchmark evaluating AI model capabilities on real-world economically valuable tasks. GDPval covers the majority of U.S. Bureau of Labor Statistics Work Activities for 44 occupations across the top 9 sectors contributing to U.S. GDP (Gross Domestic Product). Tasks are constructed from the representative work of industry professionals with an average of 14 years of experience. We find that frontier model performance on GDPval is improving roughly linearly over time, and that the current best frontier models are approaching industry experts in deliverable quality. We analy","authors_text":"Alexandra Barr, Amelia Glaese, David Li, Elizabeth Proehl, Gildas Chabot, Grace Kim, Jerry Tworek, Laurance Fauconnet, Marwan Aljubeh, Michael Sharman, Michele Wang, Natalie S. Kim, Olivia Watkins, Patrick Chao, Phoebe Thacker, Rachel Dias, Samuel Miserendino, Sim\\'on Posada Fishman, Tejal Patwardhan","cross_cats":["cs.AI","cs.CY"],"headline":"Frontier AI models approach industry experts in quality on real-world economically valuable tasks.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-05T21:36:43Z","title":"GDPval: Evaluating AI Model Performance on Real-World Economically Valuable Tasks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.04374","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-16T11:08:12.153746Z","id":"774665a5-7f98-469d-87bc-f852f2954c87","model_set":{"reader":"grok-4.3"},"one_line_summary":"GDPval benchmark finds frontier AI models approaching industry experts on economically valuable tasks from high-GDP sectors, with linear performance gains over time.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Frontier AI models approach industry experts in quality on real-world economically valuable tasks.","strongest_claim":"frontier model performance on GDPval is improving roughly linearly over time, and that the current best frontier models are approaching industry experts in deliverable quality","weakest_assumption":"that the selected tasks and expert ratings accurately represent the full range of economically valuable work and that automated grading reliably matches human expert judgment on deliverable quality"}},"verdict_id":"774665a5-7f98-469d-87bc-f852f2954c87"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:32defff5fda8cb4f953f1644cc952ecdef0a2c74168ec099297cb3a3f0b447a8","target":"record","created_at":"2026-05-17T23:38:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3e907bdaf254be3ce564273e978e1bc5f5a1f12490ec69fc10eedd5df91fda4c","cross_cats_sorted":["cs.AI","cs.CY"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-05T21:36:43Z","title_canon_sha256":"98ad289b766a236ffb8e8459c62ec63bee942b19ec85473290763a0fb9ed4d41"},"schema_version":"1.0","source":{"id":"2510.04374","kind":"arxiv","version":1}},"canonical_sha256":"17825a779d18ff1b79ec66ab34dfe1d08698c6913c9055352154a735876fb74c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"17825a779d18ff1b79ec66ab34dfe1d08698c6913c9055352154a735876fb74c","first_computed_at":"2026-05-17T23:38:48.066776Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:48.066776Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VVtcqc3AHiINtBCNIBbTfRE+Iutnyl/lnATZXqqKKpku2UVVcqmz9m6ZYiP3f4j3BYlUsDj0nHO0BE2KHtpgCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:48.067227Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.04374","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:32defff5fda8cb4f953f1644cc952ecdef0a2c74168ec099297cb3a3f0b447a8","sha256:23a2ffb68438c481dd30f5c99717f486e284da7d62fb8e98d906f4c3f23923e1"],"state_sha256":"d666853d66d291aa3ab3bfced7e7a0203748981f431a34aed3a83a5b94ff015a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NXoxf2V1qYAucrjH8+F6LJza017klKBw8N+pejkpg+Xt3jm5OAKn5Z30EYdqz6LOgYDbbCamtKYcIppy8VdfBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T21:30:49.080100Z","bundle_sha256":"7a4dd1b974c815da63a588cf05b9da77547eb73b831b9c99543a520c856f11d4"}}