{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:UFU73PXZB6SAVUI7JEK6E24GBG","short_pith_number":"pith:UFU73PXZ","schema_version":"1.0","canonical_sha256":"a169fdbef90fa40ad11f4915e26b8609a6d7935682dd25b69e97ca2a42afb62c","source":{"kind":"arxiv","id":"2606.09471","version":1},"attestation_state":"computed","paper":{"title":"Escaping the KL Agreement Trap in On-Policy Distillation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Anhao Zhao, Haoran Xin, Hui Xiong, Jin Li, Xiaoyu Shen, Ying Sun","submitted_at":"2026-06-08T13:28:54Z","abstract_excerpt":"On-policy distillation (OPD) provides dense token-level supervision by asking a teacher to score student-generated rollouts. However, when the student drifts into an unrecoverable prefix, the teacher may locally agree with the degraded state, producing low reverse KL but little corrective training signal. We identify this persistent regime as a low-KL agreement trap. Further analyses show that tokens during and after such traps produce less useful supervision signals. We propose KAT (KL Agreement Trap Termination), an online OPD termination rule that detects persistent low-KL agreement with a "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.09471","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-08T13:28:54Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"487502008e3f0eeec0e98d47f0e17bdea2cbfe9ffec9fbbda290690a212b22bd","abstract_canon_sha256":"d893b4bc9b714ecb1c160ab9c6edae4b407888c334fa140ffb39d3f7467e1a05"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:08:50.378191Z","signature_b64":"ud4jqgDgnwY6tL+IqhlMn4ofg3B8u93Mt+qrnRZCrLkEQ5pjDKOpF/laQfZbJDtllRSRhiK/dEoRgvgXt26TBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a169fdbef90fa40ad11f4915e26b8609a6d7935682dd25b69e97ca2a42afb62c","last_reissued_at":"2026-06-09T02:08:50.377478Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:08:50.377478Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Escaping the KL Agreement Trap in On-Policy Distillation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Anhao Zhao, Haoran Xin, Hui Xiong, Jin Li, Xiaoyu Shen, Ying Sun","submitted_at":"2026-06-08T13:28:54Z","abstract_excerpt":"On-policy distillation (OPD) provides dense token-level supervision by asking a teacher to score student-generated rollouts. However, when the student drifts into an unrecoverable prefix, the teacher may locally agree with the degraded state, producing low reverse KL but little corrective training signal. We identify this persistent regime as a low-KL agreement trap. Further analyses show that tokens during and after such traps produce less useful supervision signals. We propose KAT (KL Agreement Trap Termination), an online OPD termination rule that detects persistent low-KL agreement with a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.09471","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.09471/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.09471","created_at":"2026-06-09T02:08:50.377587+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.09471v1","created_at":"2026-06-09T02:08:50.377587+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.09471","created_at":"2026-06-09T02:08:50.377587+00:00"},{"alias_kind":"pith_short_12","alias_value":"UFU73PXZB6SA","created_at":"2026-06-09T02:08:50.377587+00:00"},{"alias_kind":"pith_short_16","alias_value":"UFU73PXZB6SAVUI7","created_at":"2026-06-09T02:08:50.377587+00:00"},{"alias_kind":"pith_short_8","alias_value":"UFU73PXZ","created_at":"2026-06-09T02:08:50.377587+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG","json":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG.json","graph_json":"https://pith.science/api/pith-number/UFU73PXZB6SAVUI7JEK6E24GBG/graph.json","events_json":"https://pith.science/api/pith-number/UFU73PXZB6SAVUI7JEK6E24GBG/events.json","paper":"https://pith.science/paper/UFU73PXZ"},"agent_actions":{"view_html":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG","download_json":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG.json","view_paper":"https://pith.science/paper/UFU73PXZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.09471&json=true","fetch_graph":"https://pith.science/api/pith-number/UFU73PXZB6SAVUI7JEK6E24GBG/graph.json","fetch_events":"https://pith.science/api/pith-number/UFU73PXZB6SAVUI7JEK6E24GBG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG/action/storage_attestation","attest_author":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG/action/author_attestation","sign_citation":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG/action/citation_signature","submit_replication":"https://pith.science/pith/UFU73PXZB6SAVUI7JEK6E24GBG/action/replication_record"}},"created_at":"2026-06-09T02:08:50.377587+00:00","updated_at":"2026-06-09T02:08:50.377587+00:00"}