{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:IIEAK72YB67YYZTRLIIH2KDEWJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"730dfc0fa178a079f7ed7e2f80a86ff8ac9f804908893c4a1853b3b8bb5d84b9","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2025-02-25T18:45:04Z","title_canon_sha256":"a91330707567ffa0ad37268e5f68258ff99c1316fd5dd035252f3e5e1a1cc008"},"schema_version":"1.0","source":{"id":"2502.18449","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.18449","created_at":"2026-05-17T23:38:52Z"},{"alias_kind":"arxiv_version","alias_value":"2502.18449v2","created_at":"2026-05-17T23:38:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.18449","created_at":"2026-05-17T23:38:52Z"},{"alias_kind":"pith_short_12","alias_value":"IIEAK72YB67Y","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"IIEAK72YB67YYZTR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"IIEAK72Y","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:868f2db399c823338ebc6de114343c8b2d539a028cb602cf67b24631ffa08cea","target":"graph","created_at":"2026-05-17T23:38:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"our resulting reasoning model, Llama3-SWE-RL-70B, achieves a 41.0% solve rate on SWE-bench Verified -- a human-verified collection of real-world GitHub issues. To our knowledge, this is the best performance reported for medium-sized (<100B) LLMs to date, even comparable to leading proprietary LLMs like GPT-4o."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The assumption that a lightweight rule-based similarity score between ground-truth and generated solutions serves as an effective reward for learning genuine reasoning processes rather than superficial pattern matching."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"SWE-RL uses RL on software evolution data to train LLMs achieving 41% on SWE-bench Verified with generalization to other reasoning tasks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning on open software evolution data enables LLMs to recover developer reasoning and solve 41% of real GitHub issues."}],"snapshot_sha256":"fe6533c6d9e271d0e7521ce818a7e23e44e6ebcdf982e1eb69132308f951c1fa"},"formal_canon":{"evidence_count":3,"snapshot_sha256":"5087d323b022ddc79eceb574c084178530a3aab19f5ee8cc213f8524576e8e00"},"paper":{"abstract_excerpt":"The recent DeepSeek-R1 release has demonstrated the immense potential of reinforcement learning (RL) in enhancing the general reasoning capabilities of large language models (LLMs). While DeepSeek-R1 and other follow-up work primarily focus on applying RL to competitive coding and math problems, this paper introduces SWE-RL, the first approach to scale RL-based LLM reasoning for real-world software engineering. Leveraging a lightweight rule-based reward (e.g., the similarity score between ground-truth and LLM-generated solutions), SWE-RL enables LLMs to autonomously recover a developer's reaso","authors_text":"Daniel Fried, Gabriel Synnaeve, Jade Copet, Lingming Zhang, Olivier Duchenne, Quentin Carbonneaux, Rishabh Singh, Sida I. Wang, Yuxiang Wei","cross_cats":["cs.AI","cs.CL"],"headline":"Reinforcement learning on open software evolution data enables LLMs to recover developer reasoning and solve 41% of real GitHub issues.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2025-02-25T18:45:04Z","title":"SWE-RL: Advancing LLM Reasoning via Reinforcement Learning on Open Software Evolution"},"references":{"count":192,"internal_anchors":15,"resolved_work":192,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Claude 3.5 sonnet model card addendum","work_id":"9821ab87-1805-43e6-8f4f-9a06dc3c9f37","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Raising the bar on SWE-bench Verified with Claude 3.5 Sonnet","work_id":"1e26d961-6bb1-4e30-b195-245b5a95cfb1","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Codet: Code generation with generated tests","work_id":"5034399f-3a73-4a3e-824b-0fe8fe4d82e7","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, Alex Ray, Raul Puri, Gretchen Krueger, Mich","work_id":"f06d44fc-f5c4-4ab7-951d-3eba0cbf5e88","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":6,"title":"Meta large language model compiler: Foundation models of compiler optimization, 2024","work_id":"e9ffd682-76e4-4ec3-9520-c34bf4936d2c","year":2024}],"snapshot_sha256":"baf37ceea2c212ab306189dc3981b7be94026341c1a43e50528c8a73afef1435"},"source":{"id":"2502.18449","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T10:23:06.408182Z","id":"983ad494-552f-47b5-9c12-ce1cc3d6d6fe","model_set":{"reader":"grok-4.3"},"one_line_summary":"SWE-RL uses RL on software evolution data to train LLMs achieving 41% on SWE-bench Verified with generalization to other reasoning tasks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning on open software evolution data enables LLMs to recover developer reasoning and solve 41% of real GitHub issues.","strongest_claim":"our resulting reasoning model, Llama3-SWE-RL-70B, achieves a 41.0% solve rate on SWE-bench Verified -- a human-verified collection of real-world GitHub issues. To our knowledge, this is the best performance reported for medium-sized (<100B) LLMs to date, even comparable to leading proprietary LLMs like GPT-4o.","weakest_assumption":"The assumption that a lightweight rule-based similarity score between ground-truth and generated solutions serves as an effective reward for learning genuine reasoning processes rather than superficial pattern matching."}},"verdict_id":"983ad494-552f-47b5-9c12-ce1cc3d6d6fe"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f47451fdb80f9a4c7baa355737ab8f7e988e1bc5d3413a721a1f47f80a09b88b","target":"record","created_at":"2026-05-17T23:38:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"730dfc0fa178a079f7ed7e2f80a86ff8ac9f804908893c4a1853b3b8bb5d84b9","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2025-02-25T18:45:04Z","title_canon_sha256":"a91330707567ffa0ad37268e5f68258ff99c1316fd5dd035252f3e5e1a1cc008"},"schema_version":"1.0","source":{"id":"2502.18449","kind":"arxiv","version":2}},"canonical_sha256":"4208057f580fbf8c66715a107d2864b2548c8bef57c8e7cc713325e3ddea90f6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4208057f580fbf8c66715a107d2864b2548c8bef57c8e7cc713325e3ddea90f6","first_computed_at":"2026-05-17T23:38:52.780436Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:52.780436Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Q38ZOVdIVSz2Ajg6Ki1/68gdl5HvoQriAWLSgKBR5op6Uw11PtFwANVSb0XfjUsQkQSzZXMhKO8sGPpaufPCBw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:52.781007Z","signed_message":"canonical_sha256_bytes"},"source_id":"2502.18449","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f47451fdb80f9a4c7baa355737ab8f7e988e1bc5d3413a721a1f47f80a09b88b","sha256:868f2db399c823338ebc6de114343c8b2d539a028cb602cf67b24631ffa08cea"],"state_sha256":"b40ca829a949304f2cdd97b047ee2d344ddd045afd4f9ca2b8bee0c106f320b0"}