{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:IRRAAFXSBTGRA4ITJNK4HQORCR","short_pith_number":"pith:IRRAAFXS","canonical_record":{"source":{"id":"2602.22480","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-25T23:40:22Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"54cd9655369e4f18057967c0ba3e7d202e9dfa6fbf92f8a18a7c72b59c453ecb","abstract_canon_sha256":"9ef5ea8fb8de7c103a816617ba15f66eba473bf1596ea57e2f25790c613716a3"},"schema_version":"1.0"},"canonical_sha256":"44620016f20ccd1071134b55c3c1d11464be0a210a4b25f79806e07d6706d4d4","source":{"kind":"arxiv","id":"2602.22480","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.22480","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"arxiv_version","alias_value":"2602.22480v4","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.22480","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"pith_short_12","alias_value":"IRRAAFXSBTGR","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"pith_short_16","alias_value":"IRRAAFXSBTGRA4IT","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"pith_short_8","alias_value":"IRRAAFXS","created_at":"2026-06-03T02:05:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:IRRAAFXSBTGRA4ITJNK4HQORCR","target":"record","payload":{"canonical_record":{"source":{"id":"2602.22480","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-25T23:40:22Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"54cd9655369e4f18057967c0ba3e7d202e9dfa6fbf92f8a18a7c72b59c453ecb","abstract_canon_sha256":"9ef5ea8fb8de7c103a816617ba15f66eba473bf1596ea57e2f25790c613716a3"},"schema_version":"1.0"},"canonical_sha256":"44620016f20ccd1071134b55c3c1d11464be0a210a4b25f79806e07d6706d4d4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T02:05:46.192096Z","signature_b64":"a+zXwixtg5w2kWNTPDKr/vFnBW0YBIozNVD8rJNmWjs9RRAdBtscua/Hcdkjcgm/xYX3FnmfIEyn3MK4ba5SBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"44620016f20ccd1071134b55c3c1d11464be0a210a4b25f79806e07d6706d4d4","last_reissued_at":"2026-06-03T02:05:46.191614Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T02:05:46.191614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.22480","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T02:05:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"eBo1FsC8ZrghJDb8KyG+fqH9KMwkQjobOnXEdYkfEd6udNW6aHQRnaFeFjRtl0IYpGDwzKG3JcRhGZwWs6b7BQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T22:09:29.591003Z"},"content_sha256":"80fa3cf6462bd432d07e8445122f936013badc25aa82f6c4f2ea45815ff11bf8","schema_version":"1.0","event_id":"sha256:80fa3cf6462bd432d07e8445122f936013badc25aa82f6c4f2ea45815ff11bf8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:IRRAAFXSBTGRA4ITJNK4HQORCR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VeRO: A Harness for Agents to Optimize Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"VERO supplies a reproducible harness with versioned snapshots and structured traces to compare how agents optimize other agents.","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.AI","authors_text":"Apaar Shanker, Samuel Marc Denton, Varun Ursekar, Veronica Chatrath, Yuan Xue","submitted_at":"2026-02-25T23:40:22Z","abstract_excerpt":"An important emerging application of coding agents is agent harness optimization: the iterative improvement of a target agent by editing and evaluating its code. Despite its relevance, the community lacks a systematic understanding of coding agent performance on this task. Harness optimization differs from conventional software engineering: agent harnesses interleave deterministic code with stochastic LLM completions, requiring structured capture of both intermediate execution traces and downstream outcomes. To address these challenges, we introduce (1) VeRO (Versioning, Rewards, and Observati"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Using VERO, we conduct an empirical study comparing optimizer configurations across tasks and analyzing which modifications reliably improve target agent performance.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That structured capture of intermediate reasoning and downstream execution outcomes, together with budget-controlled evaluation, is both necessary and sufficient to produce reliable comparisons of agent optimizers.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"VeRO supplies a versioned harness, benchmark suite, and empirical comparison of optimizer configurations for coding agents that improve other agents.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"VERO supplies a reproducible harness with versioned snapshots and structured traces to compare how agents optimize other agents.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"7e86d8aea8f34bade16548f9604c8aafbe55f78d9cadbbd0743ba186d1f17d3d"},"source":{"id":"2602.22480","kind":"arxiv","version":4},"verdict":{"id":"15d0951f-29a5-4f77-bc88-9be90ae1092b","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T18:59:33.979445Z","strongest_claim":"Using VERO, we conduct an empirical study comparing optimizer configurations across tasks and analyzing which modifications reliably improve target agent performance.","one_line_summary":"VeRO supplies a versioned harness, benchmark suite, and empirical comparison of optimizer configurations for coding agents that improve other agents.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That structured capture of intermediate reasoning and downstream execution outcomes, together with budget-controlled evaluation, is both necessary and sufficient to produce reliable comparisons of agent optimizers.","pith_extraction_headline":"VERO supplies a reproducible harness with versioned snapshots and structured traces to compare how agents optimize other agents."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.22480/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"15d0951f-29a5-4f77-bc88-9be90ae1092b"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T02:05:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vrinYlsftKlSij5lqwfzgkA1l/AM2KrTqI4FSnSmw5mj34RzP5UM8okNJIQEM4NhOUk0SxlcH5lbpug9FNjBBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T22:09:29.591477Z"},"content_sha256":"29ae416cb5010caacebd6a4146d9cadb6026dcb749829ef7506eccf0bcc82ce6","schema_version":"1.0","event_id":"sha256:29ae416cb5010caacebd6a4146d9cadb6026dcb749829ef7506eccf0bcc82ce6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IRRAAFXSBTGRA4ITJNK4HQORCR/bundle.json","state_url":"https://pith.science/pith/IRRAAFXSBTGRA4ITJNK4HQORCR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IRRAAFXSBTGRA4ITJNK4HQORCR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T22:09:29Z","links":{"resolver":"https://pith.science/pith/IRRAAFXSBTGRA4ITJNK4HQORCR","bundle":"https://pith.science/pith/IRRAAFXSBTGRA4ITJNK4HQORCR/bundle.json","state":"https://pith.science/pith/IRRAAFXSBTGRA4ITJNK4HQORCR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IRRAAFXSBTGRA4ITJNK4HQORCR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:IRRAAFXSBTGRA4ITJNK4HQORCR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9ef5ea8fb8de7c103a816617ba15f66eba473bf1596ea57e2f25790c613716a3","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-25T23:40:22Z","title_canon_sha256":"54cd9655369e4f18057967c0ba3e7d202e9dfa6fbf92f8a18a7c72b59c453ecb"},"schema_version":"1.0","source":{"id":"2602.22480","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.22480","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"arxiv_version","alias_value":"2602.22480v4","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.22480","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"pith_short_12","alias_value":"IRRAAFXSBTGR","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"pith_short_16","alias_value":"IRRAAFXSBTGRA4IT","created_at":"2026-06-03T02:05:46Z"},{"alias_kind":"pith_short_8","alias_value":"IRRAAFXS","created_at":"2026-06-03T02:05:46Z"}],"graph_snapshots":[{"event_id":"sha256:29ae416cb5010caacebd6a4146d9cadb6026dcb749829ef7506eccf0bcc82ce6","target":"graph","created_at":"2026-06-03T02:05:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Using VERO, we conduct an empirical study comparing optimizer configurations across tasks and analyzing which modifications reliably improve target agent performance."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That structured capture of intermediate reasoning and downstream execution outcomes, together with budget-controlled evaluation, is both necessary and sufficient to produce reliable comparisons of agent optimizers."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"VeRO supplies a versioned harness, benchmark suite, and empirical comparison of optimizer configurations for coding agents that improve other agents."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"VERO supplies a reproducible harness with versioned snapshots and structured traces to compare how agents optimize other agents."}],"snapshot_sha256":"7e86d8aea8f34bade16548f9604c8aafbe55f78d9cadbbd0743ba186d1f17d3d"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.22480/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"An important emerging application of coding agents is agent harness optimization: the iterative improvement of a target agent by editing and evaluating its code. Despite its relevance, the community lacks a systematic understanding of coding agent performance on this task. Harness optimization differs from conventional software engineering: agent harnesses interleave deterministic code with stochastic LLM completions, requiring structured capture of both intermediate execution traces and downstream outcomes. To address these challenges, we introduce (1) VeRO (Versioning, Rewards, and Observati","authors_text":"Apaar Shanker, Samuel Marc Denton, Varun Ursekar, Veronica Chatrath, Yuan Xue","cross_cats":["cs.CL","cs.LG"],"headline":"VERO supplies a reproducible harness with versioned snapshots and structured traces to compare how agents optimize other agents.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-25T23:40:22Z","title":"VeRO: A Harness for Agents to Optimize Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.22480","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-15T18:59:33.979445Z","id":"15d0951f-29a5-4f77-bc88-9be90ae1092b","model_set":{"reader":"grok-4.3"},"one_line_summary":"VeRO supplies a versioned harness, benchmark suite, and empirical comparison of optimizer configurations for coding agents that improve other agents.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"VERO supplies a reproducible harness with versioned snapshots and structured traces to compare how agents optimize other agents.","strongest_claim":"Using VERO, we conduct an empirical study comparing optimizer configurations across tasks and analyzing which modifications reliably improve target agent performance.","weakest_assumption":"That structured capture of intermediate reasoning and downstream execution outcomes, together with budget-controlled evaluation, is both necessary and sufficient to produce reliable comparisons of agent optimizers."}},"verdict_id":"15d0951f-29a5-4f77-bc88-9be90ae1092b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:80fa3cf6462bd432d07e8445122f936013badc25aa82f6c4f2ea45815ff11bf8","target":"record","created_at":"2026-06-03T02:05:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9ef5ea8fb8de7c103a816617ba15f66eba473bf1596ea57e2f25790c613716a3","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-25T23:40:22Z","title_canon_sha256":"54cd9655369e4f18057967c0ba3e7d202e9dfa6fbf92f8a18a7c72b59c453ecb"},"schema_version":"1.0","source":{"id":"2602.22480","kind":"arxiv","version":4}},"canonical_sha256":"44620016f20ccd1071134b55c3c1d11464be0a210a4b25f79806e07d6706d4d4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"44620016f20ccd1071134b55c3c1d11464be0a210a4b25f79806e07d6706d4d4","first_computed_at":"2026-06-03T02:05:46.191614Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T02:05:46.191614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"a+zXwixtg5w2kWNTPDKr/vFnBW0YBIozNVD8rJNmWjs9RRAdBtscua/Hcdkjcgm/xYX3FnmfIEyn3MK4ba5SBg==","signature_status":"signed_v1","signed_at":"2026-06-03T02:05:46.192096Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.22480","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:80fa3cf6462bd432d07e8445122f936013badc25aa82f6c4f2ea45815ff11bf8","sha256:29ae416cb5010caacebd6a4146d9cadb6026dcb749829ef7506eccf0bcc82ce6"],"state_sha256":"9c9f6b19a4f9b23f71fe1d622e20729436f9e7885ee3bb7cd8188b7329ebf0e2"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zzqYAW/HQG4dyUZ0+A7CPu0UTi5htVhBZHxZmOVJ1XAmhBlvtdVXx9ByGHIpknhn8Tr1MYH5h1Q6c2NwQH07Ag==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T22:09:29.593811Z","bundle_sha256":"9af45a975545a5e4b3bd8a403ef16f366e6a0dfef41211876920f4910112e573"}}