{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3OSFOGQDPAO6Z6PTMM46CR36HM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"df6fad198badefca6dcbf3deef58d549808e5cfca8465e2f0352ec3f0d8c1751","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-21T15:34:21Z","title_canon_sha256":"f08081b7a6add874c16f9f5e37247900de3133660ae63bc60dae6fdafb757672"},"schema_version":"1.0","source":{"id":"2606.23740","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.23740","created_at":"2026-06-24T00:14:24Z"},{"alias_kind":"arxiv_version","alias_value":"2606.23740v1","created_at":"2026-06-24T00:14:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.23740","created_at":"2026-06-24T00:14:24Z"},{"alias_kind":"pith_short_12","alias_value":"3OSFOGQDPAO6","created_at":"2026-06-24T00:14:24Z"},{"alias_kind":"pith_short_16","alias_value":"3OSFOGQDPAO6Z6PT","created_at":"2026-06-24T00:14:24Z"},{"alias_kind":"pith_short_8","alias_value":"3OSFOGQD","created_at":"2026-06-24T00:14:24Z"}],"graph_snapshots":[{"event_id":"sha256:c814bae9931d17def18ebae148ca8cfb565349b33b43b07df439d9f8f78ab7e8","target":"graph","created_at":"2026-06-24T00:14:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.23740/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Offline reinforcement-learning losses (RFT, RIFT, DFT, Offline GRPO, DPO) are widely used to distill reasoning from large teachers into smaller students, and are typically compared on downstream accuracy alone. We ask whether they are mechanistically distinct or converge to a similar weight update. Training six methods (SFT, RFT, DFT, RIFT, Offline GRPO, DPO) on identical math rollouts from a single base model (Qwen3-4B) with attention-only LoRA, we analyze the resulting deltas via cosine similarity, principal-angle subspace analysis, linear mode connectivity, and CKA. We observe: (i) SFT, RFT","authors_text":"Aleksandr Nikolich, Igor Kiselev, Karina Romanova, Vladimir Platonov","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-21T15:34:21Z","title":"Weight-Space Geometry of Offline Reasoning Training"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.23740","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1cfcece2b07b24b424e4d4bfebc9091f3b4866c5706a0ff8f2e3894c26b1989a","target":"record","created_at":"2026-06-24T00:14:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"df6fad198badefca6dcbf3deef58d549808e5cfca8465e2f0352ec3f0d8c1751","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-21T15:34:21Z","title_canon_sha256":"f08081b7a6add874c16f9f5e37247900de3133660ae63bc60dae6fdafb757672"},"schema_version":"1.0","source":{"id":"2606.23740","kind":"arxiv","version":1}},"canonical_sha256":"dba4571a03781decf9f36339e1477e3b2a705c8913fe51e480bf72c082a96b5e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dba4571a03781decf9f36339e1477e3b2a705c8913fe51e480bf72c082a96b5e","first_computed_at":"2026-06-24T00:14:24.683680Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T00:14:24.683680Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KtwYwBRMgJ16xo5yJt5wBGrDeBJ9TWIsx0P88yyZG8LoexaLxuuGxGusn4oSaJel2vQqfgLIK2ZVzSPa14+lDA==","signature_status":"signed_v1","signed_at":"2026-06-24T00:14:24.684086Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.23740","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1cfcece2b07b24b424e4d4bfebc9091f3b4866c5706a0ff8f2e3894c26b1989a","sha256:c814bae9931d17def18ebae148ca8cfb565349b33b43b07df439d9f8f78ab7e8"],"state_sha256":"6a5e68061103bfb92ebff9620fa699bfb950dfeb2c7792c31cfa9a760f5db636"}