{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:STY4O33B6CDDNFQLBLP3E4ADDB","short_pith_number":"pith:STY4O33B","schema_version":"1.0","canonical_sha256":"94f1c76f61f08636960b0adfb2700318692cfb976f8f5452cf984403ecc5cbfb","source":{"kind":"arxiv","id":"2606.17515","version":1},"attestation_state":"computed","paper":{"title":"Anytime-valid Optimal Policy Identification","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"stat.ME","authors_text":"Daniel Molitor","submitted_at":"2026-06-16T04:49:19Z","abstract_excerpt":"We develop an anytime-valid framework for optimal policy identification from logged contextual bandit data. In many applied settings, the analyst wants to select the optimal policy from a candidate policy class $\\Pi$, but data are generated by an externally determined logging policy that they do not control. The analyst may also wish to monitor evidence continuously, stopping once the optimal policy is clear rather than committing to a fixed sample size in advance. This paper addresses these challenges by constructing a time-indexed set $S_t$ that retains the true optimal policy set uniformly "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.17515","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ME","submitted_at":"2026-06-16T04:49:19Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ebfe6c92eca2e2fb0401747a85f6f945864b662db9f72473b6e2852d57679a81","abstract_canon_sha256":"1aa62ffe621a4994f574456fb15a1993006d9b134d1f4ffab98dffd2c88ebbaa"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:14.892515Z","signature_b64":"yWvJ2sT2flK/jQwqPyMU+ecPCcIRjANkaN4vgiqNlaXPmNsrfBMUx4s3vAXDhXnWh+tsJvoh61Q+Dv1M8gO3BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"94f1c76f61f08636960b0adfb2700318692cfb976f8f5452cf984403ecc5cbfb","last_reissued_at":"2026-06-19T16:10:14.892117Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:14.892117Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Anytime-valid Optimal Policy Identification","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"stat.ME","authors_text":"Daniel Molitor","submitted_at":"2026-06-16T04:49:19Z","abstract_excerpt":"We develop an anytime-valid framework for optimal policy identification from logged contextual bandit data. In many applied settings, the analyst wants to select the optimal policy from a candidate policy class $\\Pi$, but data are generated by an externally determined logging policy that they do not control. The analyst may also wish to monitor evidence continuously, stopping once the optimal policy is clear rather than committing to a fixed sample size in advance. This paper addresses these challenges by constructing a time-indexed set $S_t$ that retains the true optimal policy set uniformly "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17515","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.17515/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.17515","created_at":"2026-06-19T16:10:14.892188+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.17515v1","created_at":"2026-06-19T16:10:14.892188+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17515","created_at":"2026-06-19T16:10:14.892188+00:00"},{"alias_kind":"pith_short_12","alias_value":"STY4O33B6CDD","created_at":"2026-06-19T16:10:14.892188+00:00"},{"alias_kind":"pith_short_16","alias_value":"STY4O33B6CDDNFQL","created_at":"2026-06-19T16:10:14.892188+00:00"},{"alias_kind":"pith_short_8","alias_value":"STY4O33B","created_at":"2026-06-19T16:10:14.892188+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB","json":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB.json","graph_json":"https://pith.science/api/pith-number/STY4O33B6CDDNFQLBLP3E4ADDB/graph.json","events_json":"https://pith.science/api/pith-number/STY4O33B6CDDNFQLBLP3E4ADDB/events.json","paper":"https://pith.science/paper/STY4O33B"},"agent_actions":{"view_html":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB","download_json":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB.json","view_paper":"https://pith.science/paper/STY4O33B","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.17515&json=true","fetch_graph":"https://pith.science/api/pith-number/STY4O33B6CDDNFQLBLP3E4ADDB/graph.json","fetch_events":"https://pith.science/api/pith-number/STY4O33B6CDDNFQLBLP3E4ADDB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB/action/storage_attestation","attest_author":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB/action/author_attestation","sign_citation":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB/action/citation_signature","submit_replication":"https://pith.science/pith/STY4O33B6CDDNFQLBLP3E4ADDB/action/replication_record"}},"created_at":"2026-06-19T16:10:14.892188+00:00","updated_at":"2026-06-19T16:10:14.892188+00:00"}