{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DLG3BE5ZFE2KAILVBLEUJKM3X3","short_pith_number":"pith:DLG3BE5Z","canonical_record":{"source":{"id":"2606.30445","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:17:42Z","cross_cats_sorted":[],"title_canon_sha256":"97d249a12f003dd22e65b5ec338975f7d47ae08e202417cfe6efa4a362c5d5f7","abstract_canon_sha256":"45ad23beb68da69879dc9da3b9a6f65fd1aa85f61360417a239774fef730c739"},"schema_version":"1.0"},"canonical_sha256":"1acdb093b92934a021750ac944a99bbed91e16bceaa4807de06cf38f790b28ca","source":{"kind":"arxiv","id":"2606.30445","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.30445","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"arxiv_version","alias_value":"2606.30445v1","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.30445","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"pith_short_12","alias_value":"DLG3BE5ZFE2K","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"pith_short_16","alias_value":"DLG3BE5ZFE2KAILV","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"pith_short_8","alias_value":"DLG3BE5Z","created_at":"2026-06-30T02:18:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DLG3BE5ZFE2KAILVBLEUJKM3X3","target":"record","payload":{"canonical_record":{"source":{"id":"2606.30445","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:17:42Z","cross_cats_sorted":[],"title_canon_sha256":"97d249a12f003dd22e65b5ec338975f7d47ae08e202417cfe6efa4a362c5d5f7","abstract_canon_sha256":"45ad23beb68da69879dc9da3b9a6f65fd1aa85f61360417a239774fef730c739"},"schema_version":"1.0"},"canonical_sha256":"1acdb093b92934a021750ac944a99bbed91e16bceaa4807de06cf38f790b28ca","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T02:18:15.462176Z","signature_b64":"WPqqyE7KU/UdJ7lt+qRf3Ad8U02IvOXHfao3G0uQmMdgb2HLXTWBAVRd8Xn9/RedfAieDnP7sFm6y5zmNonCCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1acdb093b92934a021750ac944a99bbed91e16bceaa4807de06cf38f790b28ca","last_reissued_at":"2026-06-30T02:18:15.461603Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T02:18:15.461603Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.30445","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T02:18:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ou5iBuLe7hG2sOFO2EYajy2TOzrqc/iqgXXEm+0pOcrekGU9p58M0eDcryNI49P9hxluLb3Z3c4Y5iYmdt+RCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T14:41:47.105722Z"},"content_sha256":"20140a2a9c00b1547de127fe1c8d71466c4ddf973d2e0e6b1ce542380b74bc49","schema_version":"1.0","event_id":"sha256:20140a2a9c00b1547de127fe1c8d71466c4ddf973d2e0e6b1ce542380b74bc49"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DLG3BE5ZFE2KAILVBLEUJKM3X3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"When Does Online Imitation Learning Help in LLM Post-Training? The Role of (Non-)Realizability Beyond Horizon","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Andrej Risteski, Bingbin Liu, Huaqing Zhang, Jingchu Gai, Juno Kim","submitted_at":"2026-06-29T15:17:42Z","abstract_excerpt":"Online imitation learning (IL), particularly on-policy distillation, has emerged as a strong LLM post-training approach, often outperforming offline supervised fine-tuning (SFT). Yet a principled understanding of when and why online interaction helps remains unclear. In this work, we challenge the view that error accumulation is the main source of online IL's advantage, and instead show that the benefits of online interaction depend critically on whether the setting is realizable, i.e., whether the student policy class can represent the expert policy. Under realizability, we empirically find t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.30445","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.30445/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T02:18:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zY7Axx0qxnTGWAJjihNacnCDe9I74Rhe80nGiCrQmVRI9RSMuHQ29uIvXg7F6QB399IZgWCPNjH6qrboK9IGBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T14:41:47.106123Z"},"content_sha256":"b25d4322a0823945638aafd549af7180b2210c55ca943247be8caa2cda30f086","schema_version":"1.0","event_id":"sha256:b25d4322a0823945638aafd549af7180b2210c55ca943247be8caa2cda30f086"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DLG3BE5ZFE2KAILVBLEUJKM3X3/bundle.json","state_url":"https://pith.science/pith/DLG3BE5ZFE2KAILVBLEUJKM3X3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DLG3BE5ZFE2KAILVBLEUJKM3X3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T14:41:47Z","links":{"resolver":"https://pith.science/pith/DLG3BE5ZFE2KAILVBLEUJKM3X3","bundle":"https://pith.science/pith/DLG3BE5ZFE2KAILVBLEUJKM3X3/bundle.json","state":"https://pith.science/pith/DLG3BE5ZFE2KAILVBLEUJKM3X3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DLG3BE5ZFE2KAILVBLEUJKM3X3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DLG3BE5ZFE2KAILVBLEUJKM3X3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"45ad23beb68da69879dc9da3b9a6f65fd1aa85f61360417a239774fef730c739","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:17:42Z","title_canon_sha256":"97d249a12f003dd22e65b5ec338975f7d47ae08e202417cfe6efa4a362c5d5f7"},"schema_version":"1.0","source":{"id":"2606.30445","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.30445","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"arxiv_version","alias_value":"2606.30445v1","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.30445","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"pith_short_12","alias_value":"DLG3BE5ZFE2K","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"pith_short_16","alias_value":"DLG3BE5ZFE2KAILV","created_at":"2026-06-30T02:18:15Z"},{"alias_kind":"pith_short_8","alias_value":"DLG3BE5Z","created_at":"2026-06-30T02:18:15Z"}],"graph_snapshots":[{"event_id":"sha256:b25d4322a0823945638aafd549af7180b2210c55ca943247be8caa2cda30f086","target":"graph","created_at":"2026-06-30T02:18:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.30445/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Online imitation learning (IL), particularly on-policy distillation, has emerged as a strong LLM post-training approach, often outperforming offline supervised fine-tuning (SFT). Yet a principled understanding of when and why online interaction helps remains unclear. In this work, we challenge the view that error accumulation is the main source of online IL's advantage, and instead show that the benefits of online interaction depend critically on whether the setting is realizable, i.e., whether the student policy class can represent the expert policy. Under realizability, we empirically find t","authors_text":"Andrej Risteski, Bingbin Liu, Huaqing Zhang, Jingchu Gai, Juno Kim","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:17:42Z","title":"When Does Online Imitation Learning Help in LLM Post-Training? The Role of (Non-)Realizability Beyond Horizon"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.30445","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20140a2a9c00b1547de127fe1c8d71466c4ddf973d2e0e6b1ce542380b74bc49","target":"record","created_at":"2026-06-30T02:18:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"45ad23beb68da69879dc9da3b9a6f65fd1aa85f61360417a239774fef730c739","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:17:42Z","title_canon_sha256":"97d249a12f003dd22e65b5ec338975f7d47ae08e202417cfe6efa4a362c5d5f7"},"schema_version":"1.0","source":{"id":"2606.30445","kind":"arxiv","version":1}},"canonical_sha256":"1acdb093b92934a021750ac944a99bbed91e16bceaa4807de06cf38f790b28ca","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1acdb093b92934a021750ac944a99bbed91e16bceaa4807de06cf38f790b28ca","first_computed_at":"2026-06-30T02:18:15.461603Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-30T02:18:15.461603Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WPqqyE7KU/UdJ7lt+qRf3Ad8U02IvOXHfao3G0uQmMdgb2HLXTWBAVRd8Xn9/RedfAieDnP7sFm6y5zmNonCCw==","signature_status":"signed_v1","signed_at":"2026-06-30T02:18:15.462176Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.30445","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20140a2a9c00b1547de127fe1c8d71466c4ddf973d2e0e6b1ce542380b74bc49","sha256:b25d4322a0823945638aafd549af7180b2210c55ca943247be8caa2cda30f086"],"state_sha256":"bf1600a68e11e1746ba7801660b7f1f84a943c3330523b9a36e66ec833b9e5ac"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m9OHb6BHb5j2uKqtV2BS6FCkpVlMxxIGYVZgEJkk0VVwG3yp/U5IxxV7nSLmukiOmbzVWI2khT55YcipJQf4Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T14:41:47.108324Z","bundle_sha256":"4e74c0b78d8d7ca9247718ce8a24d26d79e80ec43c31335032cd66c2e146efd2"}}