{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:D2PL65G5AWDFH674VAU7QN42R3","short_pith_number":"pith:D2PL65G5","schema_version":"1.0","canonical_sha256":"1e9ebf74dd058653fbfca829f8379a8eea6428ecac153478fd3867c5e53e4a7c","source":{"kind":"arxiv","id":"2606.12370","version":1},"attestation_state":"computed","paper":{"title":"Breaking Entropy Bounds: Accelerating RL Training via MTP with Rejection Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"An Yang, Bowen Yu, Bo Zheng, Dayiheng Liu, Fan Zhou, Fei Huang, Huiqiang Jiang, Jianwei Zhang, Jianxin Yang, Jingren Zhou, Junyang Lin, Rui Men, Yang Xu, Yi Zhang, Yizhong Cao, Yucheng Li, Yuhao Shen","submitted_at":"2026-06-10T17:36:45Z","abstract_excerpt":"Reinforcement learning (RL) has become a key component in modern large language models, yet the rollout stage remains the key bottleneck in RL training pipelines. Although Multi-Token Prediction (MTP) offers a natural solution to accelerate rollouts through speculative decoding, many studies have observed that MTP acceptance rates degrade significantly during RL training, leading to limited speedup performance. To address this bottleneck, we present Bebop, a systematic study of MTP in LLM post-training, and offer practical recipes to integrate MTP into large-scale RL pipelines. First, we revea"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.12370","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-10T17:36:45Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"2d02ece192613e086e9987d713edeba5728743dea981226ee058c2df17799f65","abstract_canon_sha256":"7d1522ef0adaada18018d52d016ad94167e740dca72d31c0c23b67256612bfcb"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T02:09:48.862323Z","signature_b64":"+GOth1b9PY41pTb55yktM+EcYLvyUmMU9RIvoiTx1Hys5tWIMKysC71a8OhXGR4g99sp8LiAaGu2oalo8ry6DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1e9ebf74dd058653fbfca829f8379a8eea6428ecac153478fd3867c5e53e4a7c","last_reissued_at":"2026-06-11T02:09:48.861823Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T02:09:48.861823Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Breaking Entropy Bounds: Accelerating RL Training via MTP with Rejection Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"An Yang, Bowen Yu, Bo Zheng, Dayiheng Liu, Fan Zhou, Fei Huang, Huiqiang Jiang, Jianwei Zhang, Jianxin Yang, Jingren Zhou, Junyang Lin, Rui Men, Yang Xu, Yi Zhang, Yizhong Cao, Yucheng Li, Yuhao Shen","submitted_at":"2026-06-10T17:36:45Z","abstract_excerpt":"Reinforcement learning (RL) has become a key component in modern large language models, yet the rollout stage remains the key bottleneck in RL training pipelines. Although Multi-Token Prediction (MTP) offers a natural solution to accelerate rollouts through speculative decoding, many studies have observed that MTP acceptance rates degrade significantly during RL training, leading to limited speedup performance. To address this bottleneck, we present Bebop, a systematic study of MTP in LLM post-training, and offer practical recipes to integrate MTP into large-scale RL pipelines. First, we revea"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12370","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.12370/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.12370","created_at":"2026-06-11T02:09:48.861884+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.12370v1","created_at":"2026-06-11T02:09:48.861884+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12370","created_at":"2026-06-11T02:09:48.861884+00:00"},{"alias_kind":"pith_short_12","alias_value":"D2PL65G5AWDF","created_at":"2026-06-11T02:09:48.861884+00:00"},{"alias_kind":"pith_short_16","alias_value":"D2PL65G5AWDFH674","created_at":"2026-06-11T02:09:48.861884+00:00"},{"alias_kind":"pith_short_8","alias_value":"D2PL65G5","created_at":"2026-06-11T02:09:48.861884+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3","json":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3.json","graph_json":"https://pith.science/api/pith-number/D2PL65G5AWDFH674VAU7QN42R3/graph.json","events_json":"https://pith.science/api/pith-number/D2PL65G5AWDFH674VAU7QN42R3/events.json","paper":"https://pith.science/paper/D2PL65G5"},"agent_actions":{"view_html":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3","download_json":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3.json","view_paper":"https://pith.science/paper/D2PL65G5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.12370&json=true","fetch_graph":"https://pith.science/api/pith-number/D2PL65G5AWDFH674VAU7QN42R3/graph.json","fetch_events":"https://pith.science/api/pith-number/D2PL65G5AWDFH674VAU7QN42R3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3/action/storage_attestation","attest_author":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3/action/author_attestation","sign_citation":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3/action/citation_signature","submit_replication":"https://pith.science/pith/D2PL65G5AWDFH674VAU7QN42R3/action/replication_record"}},"created_at":"2026-06-11T02:09:48.861884+00:00","updated_at":"2026-06-11T02:09:48.861884+00:00"}