{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:OGPC33FFYMVU7AOHCOLYSGKUGA","short_pith_number":"pith:OGPC33FF","schema_version":"1.0","canonical_sha256":"719e2deca5c32b4f81c71397891954300188b9461e776f22764b6f8f8675396f","source":{"kind":"arxiv","id":"2504.05520","version":4},"attestation_state":"computed","paper":{"title":"Efficient Reinforcement Finetuning via Adaptive Curriculum Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Jieyu Zhao, Linxin Song, Taiwei Shi, Tianyi Zhou, Yiyang Wu","submitted_at":"2025-04-07T21:31:31Z","abstract_excerpt":"Reinforcement finetuning (RFT) has shown great potential for enhancing the mathematical reasoning capabilities of large language models (LLMs), but it is often sample- and compute-inefficient, requiring extensive training. In this work, we introduce AdaRFT (Adaptive Curriculum Reinforcement Finetuning), a method that significantly improves the efficiency of RFT through adaptive curriculum learning. AdaRFT dynamically adjusts the difficulty of training problems based on the model's recent reward signals, ensuring that the model consistently trains on tasks that are challenging but solvable. Thi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2504.05520","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-04-07T21:31:31Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"5958956eaae622b8906b00b75373a53063848a853c258c32efa331fe963e4b0d","abstract_canon_sha256":"fb4234b8ba5bbdc0e2ae165d35b08e8026ec7e99ade82f6fb677ca346e74aef8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T02:13:14.853152Z","signature_b64":"tABYj7qkGKwu7joYUDB6KeYF9TAvNOYvDsA1izabZxcyYe2yir2RAaNYDEwtXwHihUH+Uv9Cjfss45KMj1T5Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"719e2deca5c32b4f81c71397891954300188b9461e776f22764b6f8f8675396f","last_reissued_at":"2026-06-23T02:13:14.852673Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T02:13:14.852673Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Efficient Reinforcement Finetuning via Adaptive Curriculum Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Jieyu Zhao, Linxin Song, Taiwei Shi, Tianyi Zhou, Yiyang Wu","submitted_at":"2025-04-07T21:31:31Z","abstract_excerpt":"Reinforcement finetuning (RFT) has shown great potential for enhancing the mathematical reasoning capabilities of large language models (LLMs), but it is often sample- and compute-inefficient, requiring extensive training. In this work, we introduce AdaRFT (Adaptive Curriculum Reinforcement Finetuning), a method that significantly improves the efficiency of RFT through adaptive curriculum learning. AdaRFT dynamically adjusts the difficulty of training problems based on the model's recent reward signals, ensuring that the model consistently trains on tasks that are challenging but solvable. Thi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2504.05520","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2504.05520/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2504.05520","created_at":"2026-06-23T02:13:14.852739+00:00"},{"alias_kind":"arxiv_version","alias_value":"2504.05520v4","created_at":"2026-06-23T02:13:14.852739+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2504.05520","created_at":"2026-06-23T02:13:14.852739+00:00"},{"alias_kind":"pith_short_12","alias_value":"OGPC33FFYMVU","created_at":"2026-06-23T02:13:14.852739+00:00"},{"alias_kind":"pith_short_16","alias_value":"OGPC33FFYMVU7AOH","created_at":"2026-06-23T02:13:14.852739+00:00"},{"alias_kind":"pith_short_8","alias_value":"OGPC33FF","created_at":"2026-06-23T02:13:14.852739+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":9,"internal_anchor_count":9,"sample":[{"citing_arxiv_id":"2602.12579","citing_title":"VI-CuRL: Stabilizing Verifier-Independent RL Reasoning via Confidence-Guided Variance Reduction","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22567","citing_title":"LANG: Reinforcement Learning for Multilingual Reasoning with Language-Adaptive Hint Guidance","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17037","citing_title":"D$^2$Evo: Dual Difficulty-Aware Self-Evolution for Data-Efficient Reinforcement Learning","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2601.04809","citing_title":"SCALER:Synthetic Scalable Adaptive Learning Environment for Reasoning","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2603.08659","citing_title":"CODA: Difficulty-Aware Compute Allocation for Adaptive Reasoning","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11625","citing_title":"Nice Fold or Hero Call: Learning Budget-Efficient Thinking for Adaptive Reasoning","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09188","citing_title":"DARE: Difficulty-Adaptive Reinforcement Learning with Co-Evolved Difficulty Estimation","ref_index":45,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24005","citing_title":"TCOD: Exploring Temporal Curriculum in On-Policy Distillation for Multi-turn Autonomous Agents","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00015","citing_title":"TimeRFT: Stimulating Generalizable Time Series Forecasting for TSFMs via Reinforcement Finetuning","ref_index":59,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA","json":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA.json","graph_json":"https://pith.science/api/pith-number/OGPC33FFYMVU7AOHCOLYSGKUGA/graph.json","events_json":"https://pith.science/api/pith-number/OGPC33FFYMVU7AOHCOLYSGKUGA/events.json","paper":"https://pith.science/paper/OGPC33FF"},"agent_actions":{"view_html":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA","download_json":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA.json","view_paper":"https://pith.science/paper/OGPC33FF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2504.05520&json=true","fetch_graph":"https://pith.science/api/pith-number/OGPC33FFYMVU7AOHCOLYSGKUGA/graph.json","fetch_events":"https://pith.science/api/pith-number/OGPC33FFYMVU7AOHCOLYSGKUGA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA/action/storage_attestation","attest_author":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA/action/author_attestation","sign_citation":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA/action/citation_signature","submit_replication":"https://pith.science/pith/OGPC33FFYMVU7AOHCOLYSGKUGA/action/replication_record"}},"created_at":"2026-06-23T02:13:14.852739+00:00","updated_at":"2026-06-23T02:13:14.852739+00:00"}