{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:GEDPD23OMDEPFDLF5LVKOM75XW","short_pith_number":"pith:GEDPD23O","canonical_record":{"source":{"id":"2605.31490","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T16:12:54Z","cross_cats_sorted":[],"title_canon_sha256":"df17526429ffbeffdba8165ae421c17af2bde4eaaa364bcfdf212283ed3572c5","abstract_canon_sha256":"08223f67e8a7c930b065a0f7a989b30e456a42eb8b93020ca5b2d9bde945bbc5"},"schema_version":"1.0"},"canonical_sha256":"3106f1eb6e60c8f28d65eaeaa733fdbd9c79f641da395417a931a0cec0fba059","source":{"kind":"arxiv","id":"2605.31490","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.31490","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"arxiv_version","alias_value":"2605.31490v1","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.31490","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"pith_short_12","alias_value":"GEDPD23OMDEP","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"pith_short_16","alias_value":"GEDPD23OMDEPFDLF","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"pith_short_8","alias_value":"GEDPD23O","created_at":"2026-06-01T02:04:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:GEDPD23OMDEPFDLF5LVKOM75XW","target":"record","payload":{"canonical_record":{"source":{"id":"2605.31490","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T16:12:54Z","cross_cats_sorted":[],"title_canon_sha256":"df17526429ffbeffdba8165ae421c17af2bde4eaaa364bcfdf212283ed3572c5","abstract_canon_sha256":"08223f67e8a7c930b065a0f7a989b30e456a42eb8b93020ca5b2d9bde945bbc5"},"schema_version":"1.0"},"canonical_sha256":"3106f1eb6e60c8f28d65eaeaa733fdbd9c79f641da395417a931a0cec0fba059","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T02:04:08.918432Z","signature_b64":"VDyMWMj7uQgFPVvYQUC1KuKK5b96BIOwSBZL2C1zt1dPiMSMQpBSZJzMouSTTZnhYDymmldSriS+fSZwYG40BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3106f1eb6e60c8f28d65eaeaa733fdbd9c79f641da395417a931a0cec0fba059","last_reissued_at":"2026-06-01T02:04:08.917290Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T02:04:08.917290Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.31490","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T02:04:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0tTRWeO+5xLQApOWi5Yts2EBhc4Kx1WJjLv51cstlqVJ7gvDPfoJXCFFMa/e+j2hKrPs410qGTPleiGxDdeQDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:26:53.416165Z"},"content_sha256":"1247473584f4f7e58b631ac1440c1eabb3bc925c57fc9f5f6e8b263947e4a933","schema_version":"1.0","event_id":"sha256:1247473584f4f7e58b631ac1440c1eabb3bc925c57fc9f5f6e8b263947e4a933"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:GEDPD23OMDEPFDLF5LVKOM75XW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Are Full Rollouts Necessary for On-Policy Distillation?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dongbin Zhao, Guojun Yin, Jiajun Chai, Qichao Zhang, Songjun Tu, Wei Lin, Xiaohan Wang, Yaocheng Zhang, Yuanheng Zhu, Yuqian Fu","submitted_at":"2026-05-29T16:12:54Z","abstract_excerpt":"On-policy distillation (OPD) provides dense teacher feedback along rollouts generated by the student and has emerged as a promising post-training paradigm for long-horizon reasoning. However, standard OPD typically generates full rollouts during training, which is computationally expensive and may expose the student to unreliable teacher feedback at late rollout positions, especially during early training. We identify the rollout horizon as a key bottleneck in OPD that substantially impacts training efficiency. Unlike Reinforcement Learning with Verifiable Rewards (RLVR), OPD does not require "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.31490","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.31490/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T02:04:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"s9A69BmuI7tqdQDqA7CT/fWtQIHa/QGpFQVrA3i6owOOc7mAnGNfT78ZGSjNDWy9ZMJG41yCbtxFF30n4kOJAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:26:53.416551Z"},"content_sha256":"b5446cb0d1e84559e2dc1bef3295bcbbeb2dfc31df5b9b22c7617bbdde84bd5e","schema_version":"1.0","event_id":"sha256:b5446cb0d1e84559e2dc1bef3295bcbbeb2dfc31df5b9b22c7617bbdde84bd5e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GEDPD23OMDEPFDLF5LVKOM75XW/bundle.json","state_url":"https://pith.science/pith/GEDPD23OMDEPFDLF5LVKOM75XW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GEDPD23OMDEPFDLF5LVKOM75XW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T21:26:53Z","links":{"resolver":"https://pith.science/pith/GEDPD23OMDEPFDLF5LVKOM75XW","bundle":"https://pith.science/pith/GEDPD23OMDEPFDLF5LVKOM75XW/bundle.json","state":"https://pith.science/pith/GEDPD23OMDEPFDLF5LVKOM75XW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GEDPD23OMDEPFDLF5LVKOM75XW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:GEDPD23OMDEPFDLF5LVKOM75XW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"08223f67e8a7c930b065a0f7a989b30e456a42eb8b93020ca5b2d9bde945bbc5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T16:12:54Z","title_canon_sha256":"df17526429ffbeffdba8165ae421c17af2bde4eaaa364bcfdf212283ed3572c5"},"schema_version":"1.0","source":{"id":"2605.31490","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.31490","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"arxiv_version","alias_value":"2605.31490v1","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.31490","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"pith_short_12","alias_value":"GEDPD23OMDEP","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"pith_short_16","alias_value":"GEDPD23OMDEPFDLF","created_at":"2026-06-01T02:04:08Z"},{"alias_kind":"pith_short_8","alias_value":"GEDPD23O","created_at":"2026-06-01T02:04:08Z"}],"graph_snapshots":[{"event_id":"sha256:b5446cb0d1e84559e2dc1bef3295bcbbeb2dfc31df5b9b22c7617bbdde84bd5e","target":"graph","created_at":"2026-06-01T02:04:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.31490/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"On-policy distillation (OPD) provides dense teacher feedback along rollouts generated by the student and has emerged as a promising post-training paradigm for long-horizon reasoning. However, standard OPD typically generates full rollouts during training, which is computationally expensive and may expose the student to unreliable teacher feedback at late rollout positions, especially during early training. We identify the rollout horizon as a key bottleneck in OPD that substantially impacts training efficiency. Unlike Reinforcement Learning with Verifiable Rewards (RLVR), OPD does not require ","authors_text":"Dongbin Zhao, Guojun Yin, Jiajun Chai, Qichao Zhang, Songjun Tu, Wei Lin, Xiaohan Wang, Yaocheng Zhang, Yuanheng Zhu, Yuqian Fu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T16:12:54Z","title":"Are Full Rollouts Necessary for On-Policy Distillation?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.31490","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1247473584f4f7e58b631ac1440c1eabb3bc925c57fc9f5f6e8b263947e4a933","target":"record","created_at":"2026-06-01T02:04:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"08223f67e8a7c930b065a0f7a989b30e456a42eb8b93020ca5b2d9bde945bbc5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T16:12:54Z","title_canon_sha256":"df17526429ffbeffdba8165ae421c17af2bde4eaaa364bcfdf212283ed3572c5"},"schema_version":"1.0","source":{"id":"2605.31490","kind":"arxiv","version":1}},"canonical_sha256":"3106f1eb6e60c8f28d65eaeaa733fdbd9c79f641da395417a931a0cec0fba059","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3106f1eb6e60c8f28d65eaeaa733fdbd9c79f641da395417a931a0cec0fba059","first_computed_at":"2026-06-01T02:04:08.917290Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T02:04:08.917290Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VDyMWMj7uQgFPVvYQUC1KuKK5b96BIOwSBZL2C1zt1dPiMSMQpBSZJzMouSTTZnhYDymmldSriS+fSZwYG40BA==","signature_status":"signed_v1","signed_at":"2026-06-01T02:04:08.918432Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.31490","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1247473584f4f7e58b631ac1440c1eabb3bc925c57fc9f5f6e8b263947e4a933","sha256:b5446cb0d1e84559e2dc1bef3295bcbbeb2dfc31df5b9b22c7617bbdde84bd5e"],"state_sha256":"7040a9e9b52c301b28ae8618ca741b22d59e7b8b2e64d658a54aec73b3d71fa3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4vzJ/gvnNLTvdpP9OB2Layl3m9trSBedF+xTr1OhV3eDChYYh4cNTIXVXJcIqFJtS2IB76yTtikfsB10U4gSBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T21:26:53.418517Z","bundle_sha256":"f15e66129b54d193c345edb8896e462187b373c4139bb728ee8df10344f3940a"}}