{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:TUMS7HT3H47MWFF5JE2365U6H7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a08d85c4518229e8d8935c75cdb8a5b82b7358c06bde6fe3a1d13547d0c4c24a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-18T02:02:46Z","title_canon_sha256":"e9e91e245b5881ce729a859ba11d5ae2a68ff34660bbf7d66becc24a1f115181"},"schema_version":"1.0","source":{"id":"2606.19704","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.19704","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"arxiv_version","alias_value":"2606.19704v1","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19704","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_12","alias_value":"TUMS7HT3H47M","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_16","alias_value":"TUMS7HT3H47MWFF5","created_at":"2026-06-19T16:12:32Z"},{"alias_kind":"pith_short_8","alias_value":"TUMS7HT3","created_at":"2026-06-19T16:12:32Z"}],"graph_snapshots":[{"event_id":"sha256:cabbe800ee9a5484bfec8e89b2137bd1e760c30c4c114cb5e33d132aed5f4bc1","target":"graph","created_at":"2026-06-19T16:12:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.19704/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Agent benchmarks are growing fast, but no single benchmark touches more than four or five of the dimensions that deployment exposes. This paper aggregates the largest coordinated deep-dive of one MCP-based industrial-agent benchmark to date: fourteen parallel implementation studies covering new asset classes (including a multi-modal visual extension), alternative orchestrations, retrieval strategies, reasoning modes, infrastructure optimizations, and evaluation-methodology probes. Consolidating those studies with seven prior agent benchmarks, we argue that aggregate-score leaderboards systemat","authors_text":"Aaron Fan, Akshat Bhandari, Alimurtaza Mustafa Merchant, Alisha Vinod, Amaan Sheikh, Aman Upganlawar, Ananya Kapoor, Andrew Li, Ann Li, Aryaman Agrawal, Ayal Yakobe, Byeolah Kwon, Caroline Cahill, Charles Xu, Chengrui Li, Chun-Yi Tsai, Darief Maes, Dev Bahl, Dhaval C. Patel, Kaoutar El Maghraoui, Kirthana Natarajan, Krish Veera, Madhav Rajkondawar, Mana Abbaszadeh, Mao Le Jonathan Ang, Rohith Kanathur, Rui Li, Rujing Li, Rushin Bhatt, Sagar Chethan Kumar, Sajal Kumar Goyla, Sam Colman, Sanjaii Vijayakumar, Sanskruti Vijay Shejwal, Shambhawi Baswaraj Bhure, Shen Li, Shrey Arora, Shriya Aishani Rachakonda, Shuxin Lin, Siddharth Chethan Gowda, Tanisha Rathod, Tanmay Agarwal, Thai Quoc On, Thomas Ajai, Tianjun Feng, Tianyang Xu, Tomas Pasiecznik, Trisha Maturi, Vera Mazeeva, Vivek G. Iyer, Wei Alexander Xin, Winston Li, Yang-Jung Chen, Yassine Jebbouri, Yeshitha Bhuvanesh, Yihan Sun, Yitong Bai, Yubin Sally Go, Yunfeng Chen, Yusheng Li, Yuval Shemla","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-18T02:02:46Z","title":"Beyond Static Leaderboards: Predictive Validity for the Evaluation of LLM Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19704","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:21ee63f02827db6a6db6d3fcb1a24edf911fb00e6d2465607215de97e127693a","target":"record","created_at":"2026-06-19T16:12:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a08d85c4518229e8d8935c75cdb8a5b82b7358c06bde6fe3a1d13547d0c4c24a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-18T02:02:46Z","title_canon_sha256":"e9e91e245b5881ce729a859ba11d5ae2a68ff34660bbf7d66becc24a1f115181"},"schema_version":"1.0","source":{"id":"2606.19704","kind":"arxiv","version":1}},"canonical_sha256":"9d192f9e7b3f3ecb14bd4935bf769e3febde159d4a636dd11075c5e88aff76f7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9d192f9e7b3f3ecb14bd4935bf769e3febde159d4a636dd11075c5e88aff76f7","first_computed_at":"2026-06-19T16:12:32.860063Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:12:32.860063Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OOmJf54PULky0OUHmt8zGDZP4zIl637Ywp4Ijsy2fCz8tSZnrItmJlIbYMFgOXJxpALMe0t+PUNT+Xs7mBseBg==","signature_status":"signed_v1","signed_at":"2026-06-19T16:12:32.860534Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.19704","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:21ee63f02827db6a6db6d3fcb1a24edf911fb00e6d2465607215de97e127693a","sha256:cabbe800ee9a5484bfec8e89b2137bd1e760c30c4c114cb5e33d132aed5f4bc1"],"state_sha256":"bb663b1a8b35224cb6fa2a0d094c57ce7debdaa3cd4d57ebe360dd867d0d472b"}