{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:CRXVDAUCS5WRTEBA6LK6DKUUHH","short_pith_number":"pith:CRXVDAUC","canonical_record":{"source":{"id":"2602.01058","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T06:53:45Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"95b1798664bb53f843d3b7325588847518f5e782a49c5da424cb015d7863f8d8","abstract_canon_sha256":"69ec8a600551900182b9112d7e90428cbbe87afe453d25e6b493a7349e229de6"},"schema_version":"1.0"},"canonical_sha256":"146f518282976d199020f2d5e1aa9439ed848d31a88ef701a12920ac0875e5c5","source":{"kind":"arxiv","id":"2602.01058","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.01058","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"arxiv_version","alias_value":"2602.01058v2","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.01058","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"pith_short_12","alias_value":"CRXVDAUCS5WR","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"pith_short_16","alias_value":"CRXVDAUCS5WRTEBA","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"pith_short_8","alias_value":"CRXVDAUC","created_at":"2026-05-29T01:05:03Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:CRXVDAUCS5WRTEBA6LK6DKUUHH","target":"record","payload":{"canonical_record":{"source":{"id":"2602.01058","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T06:53:45Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"95b1798664bb53f843d3b7325588847518f5e782a49c5da424cb015d7863f8d8","abstract_canon_sha256":"69ec8a600551900182b9112d7e90428cbbe87afe453d25e6b493a7349e229de6"},"schema_version":"1.0"},"canonical_sha256":"146f518282976d199020f2d5e1aa9439ed848d31a88ef701a12920ac0875e5c5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:05:03.090229Z","signature_b64":"2rp40GmCKmKcfvJZqMuiKxBl+gRdMH+tIliv5vpHwXan66rVvIBygYuTz+5VOojTvh0R2//ncgoZr69OSwieAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"146f518282976d199020f2d5e1aa9439ed848d31a88ef701a12920ac0875e5c5","last_reissued_at":"2026-05-29T01:05:03.089153Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:05:03.089153Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.01058","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:05:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UJByKDSPYZ6tgUZxAiQYhchQ15qpSpoehVH9dS2kLzno7hOQJp/8fOe0I1ciHbfrokdMvQrAd2PnRnCVWTnVAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T05:12:32.580282Z"},"content_sha256":"87aed00a0d7a0467fd4243da1ccc9091682e62c1762841e277285b105391c608","schema_version":"1.0","event_id":"sha256:87aed00a0d7a0467fd4243da1ccc9091682e62c1762841e277285b105391c608"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:CRXVDAUCS5WRTEBA6LK6DKUUHH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Good SFT Optimizes for SFT, Better SFT Prepares for Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Dylan Zhang, Haojin Wang, Hao Peng, Qingzhi Chen, Yufeng Xu","submitted_at":"2026-02-01T06:53:45Z","abstract_excerpt":"Post-training of reasoning LLMs is a holistic process that typically consists of an offline SFT stage followed by an online reinforcement learning (RL) stage. However, SFT is often optimized in isolation to maximize SFT performance alone.\n  We show that, after identical RL training, models initialized from stronger SFT checkpoints can significantly underperform those initialized from weaker ones. We attribute this to a mismatch typical in current SFT-RL pipelines: the distribution that generates the offline SFT data can differ substantially from the policy optimized during online RL, which lea"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.01058","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.01058/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:05:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vPYOY18wFAlabuw10XwZy2jnh8eWLvc+s1EXIAzgj30gw3WfB6uEQmv77j2EoTG/UxSU1whYdUW0VUgF25gXBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T05:12:32.580699Z"},"content_sha256":"48cb442eacd334ecdc0eed2821c24384b525b8e294d1f10a1fd6dbc0d3efe831","schema_version":"1.0","event_id":"sha256:48cb442eacd334ecdc0eed2821c24384b525b8e294d1f10a1fd6dbc0d3efe831"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CRXVDAUCS5WRTEBA6LK6DKUUHH/bundle.json","state_url":"https://pith.science/pith/CRXVDAUCS5WRTEBA6LK6DKUUHH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CRXVDAUCS5WRTEBA6LK6DKUUHH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T05:12:32Z","links":{"resolver":"https://pith.science/pith/CRXVDAUCS5WRTEBA6LK6DKUUHH","bundle":"https://pith.science/pith/CRXVDAUCS5WRTEBA6LK6DKUUHH/bundle.json","state":"https://pith.science/pith/CRXVDAUCS5WRTEBA6LK6DKUUHH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CRXVDAUCS5WRTEBA6LK6DKUUHH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:CRXVDAUCS5WRTEBA6LK6DKUUHH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"69ec8a600551900182b9112d7e90428cbbe87afe453d25e6b493a7349e229de6","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T06:53:45Z","title_canon_sha256":"95b1798664bb53f843d3b7325588847518f5e782a49c5da424cb015d7863f8d8"},"schema_version":"1.0","source":{"id":"2602.01058","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.01058","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"arxiv_version","alias_value":"2602.01058v2","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.01058","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"pith_short_12","alias_value":"CRXVDAUCS5WR","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"pith_short_16","alias_value":"CRXVDAUCS5WRTEBA","created_at":"2026-05-29T01:05:03Z"},{"alias_kind":"pith_short_8","alias_value":"CRXVDAUC","created_at":"2026-05-29T01:05:03Z"}],"graph_snapshots":[{"event_id":"sha256:48cb442eacd334ecdc0eed2821c24384b525b8e294d1f10a1fd6dbc0d3efe831","target":"graph","created_at":"2026-05-29T01:05:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.01058/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Post-training of reasoning LLMs is a holistic process that typically consists of an offline SFT stage followed by an online reinforcement learning (RL) stage. However, SFT is often optimized in isolation to maximize SFT performance alone.\n  We show that, after identical RL training, models initialized from stronger SFT checkpoints can significantly underperform those initialized from weaker ones. We attribute this to a mismatch typical in current SFT-RL pipelines: the distribution that generates the offline SFT data can differ substantially from the policy optimized during online RL, which lea","authors_text":"Dylan Zhang, Haojin Wang, Hao Peng, Qingzhi Chen, Yufeng Xu","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T06:53:45Z","title":"Good SFT Optimizes for SFT, Better SFT Prepares for Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.01058","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:87aed00a0d7a0467fd4243da1ccc9091682e62c1762841e277285b105391c608","target":"record","created_at":"2026-05-29T01:05:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"69ec8a600551900182b9112d7e90428cbbe87afe453d25e6b493a7349e229de6","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T06:53:45Z","title_canon_sha256":"95b1798664bb53f843d3b7325588847518f5e782a49c5da424cb015d7863f8d8"},"schema_version":"1.0","source":{"id":"2602.01058","kind":"arxiv","version":2}},"canonical_sha256":"146f518282976d199020f2d5e1aa9439ed848d31a88ef701a12920ac0875e5c5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"146f518282976d199020f2d5e1aa9439ed848d31a88ef701a12920ac0875e5c5","first_computed_at":"2026-05-29T01:05:03.089153Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:05:03.089153Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2rp40GmCKmKcfvJZqMuiKxBl+gRdMH+tIliv5vpHwXan66rVvIBygYuTz+5VOojTvh0R2//ncgoZr69OSwieAg==","signature_status":"signed_v1","signed_at":"2026-05-29T01:05:03.090229Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.01058","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:87aed00a0d7a0467fd4243da1ccc9091682e62c1762841e277285b105391c608","sha256:48cb442eacd334ecdc0eed2821c24384b525b8e294d1f10a1fd6dbc0d3efe831"],"state_sha256":"26fd8ce562c9859cc248123d5f9f06179ed06496b1baac1efc902d726d6bd092"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AtD2Xm5IXP3xxIwfARNkJ6EJw3n6HuqJbOI7BwiHIzqBUssLGxpGBxpCdqe1u9uHqfdtvxMBV3FEt6EI2xUuBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T05:12:32.583136Z","bundle_sha256":"28ce8356909bf3c6b8061591e9319af2bbf3b8c5b8133e4ac665274c7d80aef2"}}