{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:QYW44JPAOHLYS3Z5UF6RBL66ZU","short_pith_number":"pith:QYW44JPA","schema_version":"1.0","canonical_sha256":"862dce25e071d7896f3da17d10afdecd17823655c85820ef6dfd0d376272c0ae","source":{"kind":"arxiv","id":"2606.09932","version":1},"attestation_state":"computed","paper":{"title":"When RL Fails after SFT: Rejuvenating Model Plasticity for Robust SFT-to-RL Handoff","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jiashun Liu, Ling Pan, Runze Liu, Xu Wan, Yuqian Fu","submitted_at":"2026-06-07T17:58:58Z","abstract_excerpt":"Supervised Fine-Tuning (SFT) followed by Reinforcement Learning (RL) has become a standard pipeline for Large Language Model (LLM) post-training. SFT is expected to provide a useful behavioral prior for RL to further enhance model capabilities. However, checkpoints with excessive SFT often show limited improvement during RL. We attribute this failure to the loss of model plasticity: the reduced ability of an SFT-initialized policy to be effectively reshaped by subsequent RL. To better understand this phenomenon, we conduct detailed analysis from multiple perspectives, including parameter chang"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.09932","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-07T17:58:58Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d513cffe39174c822af1141872cb6420dde423d5dbe0e545f0e9353b992c9cfd","abstract_canon_sha256":"effc05567b5e4034ad49d515f5c0c18c0816159165cbfbe91753ff56c3b0d39c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T00:08:33.644061Z","signature_b64":"yMfpCfZBTq6MYQlGVNahneq1ER0IoXP3RWEFvYD5VTne4axFht9Q9e+aASArnAn3UhvrxN6xLUpgZ9QLMlT+BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"862dce25e071d7896f3da17d10afdecd17823655c85820ef6dfd0d376272c0ae","last_reissued_at":"2026-06-10T00:08:33.643145Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T00:08:33.643145Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"When RL Fails after SFT: Rejuvenating Model Plasticity for Robust SFT-to-RL Handoff","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jiashun Liu, Ling Pan, Runze Liu, Xu Wan, Yuqian Fu","submitted_at":"2026-06-07T17:58:58Z","abstract_excerpt":"Supervised Fine-Tuning (SFT) followed by Reinforcement Learning (RL) has become a standard pipeline for Large Language Model (LLM) post-training. SFT is expected to provide a useful behavioral prior for RL to further enhance model capabilities. However, checkpoints with excessive SFT often show limited improvement during RL. We attribute this failure to the loss of model plasticity: the reduced ability of an SFT-initialized policy to be effectively reshaped by subsequent RL. To better understand this phenomenon, we conduct detailed analysis from multiple perspectives, including parameter chang"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.09932","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.09932/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.09932","created_at":"2026-06-10T00:08:33.643295+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.09932v1","created_at":"2026-06-10T00:08:33.643295+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.09932","created_at":"2026-06-10T00:08:33.643295+00:00"},{"alias_kind":"pith_short_12","alias_value":"QYW44JPAOHLY","created_at":"2026-06-10T00:08:33.643295+00:00"},{"alias_kind":"pith_short_16","alias_value":"QYW44JPAOHLYS3Z5","created_at":"2026-06-10T00:08:33.643295+00:00"},{"alias_kind":"pith_short_8","alias_value":"QYW44JPA","created_at":"2026-06-10T00:08:33.643295+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU","json":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU.json","graph_json":"https://pith.science/api/pith-number/QYW44JPAOHLYS3Z5UF6RBL66ZU/graph.json","events_json":"https://pith.science/api/pith-number/QYW44JPAOHLYS3Z5UF6RBL66ZU/events.json","paper":"https://pith.science/paper/QYW44JPA"},"agent_actions":{"view_html":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU","download_json":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU.json","view_paper":"https://pith.science/paper/QYW44JPA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.09932&json=true","fetch_graph":"https://pith.science/api/pith-number/QYW44JPAOHLYS3Z5UF6RBL66ZU/graph.json","fetch_events":"https://pith.science/api/pith-number/QYW44JPAOHLYS3Z5UF6RBL66ZU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU/action/storage_attestation","attest_author":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU/action/author_attestation","sign_citation":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU/action/citation_signature","submit_replication":"https://pith.science/pith/QYW44JPAOHLYS3Z5UF6RBL66ZU/action/replication_record"}},"created_at":"2026-06-10T00:08:33.643295+00:00","updated_at":"2026-06-10T00:08:33.643295+00:00"}