{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:KM3JL6PWGUWSP6GUPSH3IED6II","short_pith_number":"pith:KM3JL6PW","schema_version":"1.0","canonical_sha256":"533695f9f6352d27f8d47c8fb4107e423fee785b099c2dd33daf6d686674f3dc","source":{"kind":"arxiv","id":"2511.14460","version":2},"attestation_state":"computed","paper":{"title":"Agent-R1: A Unified and Modular Framework for Agentic Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Daoyu Wang, Enhong Chen, Jie Ouyang, Mingyue Cheng, Qi Liu, Qingchuan Li, Shuo Yu, Xiaoyu Tao, Yitong Zhou, Yucong Luo","submitted_at":"2025-11-18T13:03:15Z","abstract_excerpt":"Large language models (LLMs) have rapidly evolved from single-turn text generators into the foundation of increasingly capable agents. As these agents take on more complex reasoning, decision making, tool use, and long-horizon tasks, reinforcement learning (RL) is becoming increasingly important for shaping their behavior. This shift is especially visible in agentic RL, where models must interact with tools and environments across multiple rounds rather than produce a single standalone response. In this regime, the usual view of a trajectory as one ever-growing token sequence becomes increasin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2511.14460","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-11-18T13:03:15Z","cross_cats_sorted":[],"title_canon_sha256":"b984c763acf39acfc26de185b41542d7f4cf259bf15f05e466004ffc18d4fa9f","abstract_canon_sha256":"8a1618150df633bf4c32b47a19af34a91f99b197e8cfd8b242c2339e8c92a30d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:38.582499Z","signature_b64":"G3gRPZG+//Ifxsb2n7FYH346mWRzgNmhyCgJ9Jmpd0D7bPWOW2Sz8sNYsaBxGpa3YCK5ei3BKQlwgzyFS3MlCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"533695f9f6352d27f8d47c8fb4107e423fee785b099c2dd33daf6d686674f3dc","last_reissued_at":"2026-06-02T01:03:38.581972Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:38.581972Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Agent-R1: A Unified and Modular Framework for Agentic Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Daoyu Wang, Enhong Chen, Jie Ouyang, Mingyue Cheng, Qi Liu, Qingchuan Li, Shuo Yu, Xiaoyu Tao, Yitong Zhou, Yucong Luo","submitted_at":"2025-11-18T13:03:15Z","abstract_excerpt":"Large language models (LLMs) have rapidly evolved from single-turn text generators into the foundation of increasingly capable agents. As these agents take on more complex reasoning, decision making, tool use, and long-horizon tasks, reinforcement learning (RL) is becoming increasingly important for shaping their behavior. This shift is especially visible in agentic RL, where models must interact with tools and environments across multiple rounds rather than produce a single standalone response. In this regime, the usual view of a trajectory as one ever-growing token sequence becomes increasin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.14460","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2511.14460/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.14460","created_at":"2026-06-02T01:03:38.582034+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.14460v2","created_at":"2026-06-02T01:03:38.582034+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.14460","created_at":"2026-06-02T01:03:38.582034+00:00"},{"alias_kind":"pith_short_12","alias_value":"KM3JL6PWGUWS","created_at":"2026-06-02T01:03:38.582034+00:00"},{"alias_kind":"pith_short_16","alias_value":"KM3JL6PWGUWSP6GU","created_at":"2026-06-02T01:03:38.582034+00:00"},{"alias_kind":"pith_short_8","alias_value":"KM3JL6PW","created_at":"2026-06-02T01:03:38.582034+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":9,"internal_anchor_count":9,"sample":[{"citing_arxiv_id":"2605.14133","citing_title":"ClawForge: Generating Executable Interactive Benchmarks for Command-Line Agents","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2512.00520","citing_title":"Toward a Safe Internet of Agents","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14133","citing_title":"ClawForge: Generating Executable Interactive Benchmarks for Command-Line Agents","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2603.24935","citing_title":"SABER: A Stealthy Agentic Black-Box Attack Framework for Vision-Language-Action Models","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18401","citing_title":"StepPO: Step-Aligned Policy Optimization for Agentic Reinforcement Learning","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2604.10674","citing_title":"Skill-SD: Skill-Conditioned Self-Distillation for Multi-turn LLM Agents","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07339","citing_title":"Tools as Continuous Flow for Evolving Agentic Reasoning","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07927","citing_title":"EigentSearch-Q+: Enhancing Deep Research Agents with Structured Reasoning Tools","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06734","citing_title":"TEC: A Collection of Human Trial-and-error Trajectories for Problem Solving","ref_index":7,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II","json":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II.json","graph_json":"https://pith.science/api/pith-number/KM3JL6PWGUWSP6GUPSH3IED6II/graph.json","events_json":"https://pith.science/api/pith-number/KM3JL6PWGUWSP6GUPSH3IED6II/events.json","paper":"https://pith.science/paper/KM3JL6PW"},"agent_actions":{"view_html":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II","download_json":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II.json","view_paper":"https://pith.science/paper/KM3JL6PW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.14460&json=true","fetch_graph":"https://pith.science/api/pith-number/KM3JL6PWGUWSP6GUPSH3IED6II/graph.json","fetch_events":"https://pith.science/api/pith-number/KM3JL6PWGUWSP6GUPSH3IED6II/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II/action/storage_attestation","attest_author":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II/action/author_attestation","sign_citation":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II/action/citation_signature","submit_replication":"https://pith.science/pith/KM3JL6PWGUWSP6GUPSH3IED6II/action/replication_record"}},"created_at":"2026-06-02T01:03:38.582034+00:00","updated_at":"2026-06-02T01:03:38.582034+00:00"}