{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:6KU6JIV3W3XXVSUHRFEUYWNEMI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b5eb94e4d990fa59b1fa39cc5ba3366daeabfa33024ee1ecf4994751e3b9fdc2","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T05:42:19Z","title_canon_sha256":"18f35f77bf9616b38507098d3ae3c83428c41d76e7e3fe927a2696b1055e8bfe"},"schema_version":"1.0","source":{"id":"2606.17541","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.17541","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"arxiv_version","alias_value":"2606.17541v1","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17541","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"pith_short_12","alias_value":"6KU6JIV3W3XX","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"pith_short_16","alias_value":"6KU6JIV3W3XXVSUH","created_at":"2026-06-19T16:10:15Z"},{"alias_kind":"pith_short_8","alias_value":"6KU6JIV3","created_at":"2026-06-19T16:10:15Z"}],"graph_snapshots":[{"event_id":"sha256:72b60c4003791851236ebd1c22a48e44832e24fbb0f74b7676d4292063766e6d","target":"graph","created_at":"2026-06-19T16:10:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.17541/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Offline evaluation of agentic systems often collapses trajectories to terminal success, discarding information about partial progress and inducing widespread ties, creating substantial statistical inefficiency by reducing effective sample size and weakening the ability to distinguish systems. We propose preference-based trajectory evaluation, which compares trajectories directly through temporal preferences over progress and time-to-return profiles. We find that, across diverse agentic and interactive benchmarks, standard success-based metrics produce tied comparisons on roughly 75% of instanc","authors_text":"Fernando Diaz","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T05:42:19Z","title":"Offline Preference-Based Trajectory Evaluation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17541","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d9be2cd8fcff19590130b269c2f80fe5ee6eed4c6df9f2c84bbd2503b37d09e5","target":"record","created_at":"2026-06-19T16:10:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b5eb94e4d990fa59b1fa39cc5ba3366daeabfa33024ee1ecf4994751e3b9fdc2","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T05:42:19Z","title_canon_sha256":"18f35f77bf9616b38507098d3ae3c83428c41d76e7e3fe927a2696b1055e8bfe"},"schema_version":"1.0","source":{"id":"2606.17541","kind":"arxiv","version":1}},"canonical_sha256":"f2a9e4a2bbb6ef7aca8789494c59a4622e9a80c9c16ec43382fcf57f54b4db36","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f2a9e4a2bbb6ef7aca8789494c59a4622e9a80c9c16ec43382fcf57f54b4db36","first_computed_at":"2026-06-19T16:10:15.555708Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:10:15.555708Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KNhAqzLCpVkGf5ga1sljypdJGdZi6MbIGpJE6dtQa6hM5uVP+yMHpXPBdLZ1n0tbRxweAxJ6ata9zyI3gWVSDg==","signature_status":"signed_v1","signed_at":"2026-06-19T16:10:15.556050Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.17541","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d9be2cd8fcff19590130b269c2f80fe5ee6eed4c6df9f2c84bbd2503b37d09e5","sha256:72b60c4003791851236ebd1c22a48e44832e24fbb0f74b7676d4292063766e6d"],"state_sha256":"69b886d714cb180618a28c864c7d93bcdd9084186acc768f94c5744099e11373"}