{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:RX7QBDNDP4LMNBZV7DVGOHQLGN","short_pith_number":"pith:RX7QBDND","schema_version":"1.0","canonical_sha256":"8dff008da37f16c68735f8ea671e0b337290ae6ae1b653794f1efde4f26b7c9c","source":{"kind":"arxiv","id":"2603.08561","version":6},"attestation_state":"computed","paper":{"title":"RetroAgent: From Solving to Evolving via Retrospective Dual Intrinsic Feedback","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Wenqi Shao, Xia Hu, Xiaoying Zhang, Yipeng Zhang, Zichen Liu","submitted_at":"2026-03-09T16:23:33Z","abstract_excerpt":"Standard reinforcement learning (RL) for large language model (LLM) agents primarily optimizes extrinsic task rewards, often favoring isolated task completion over continual adaptation. This paradigm can cause premature convergence to suboptimal policies and leaves useful experience only implicitly encoded in model parameters, limiting its retrieval and reuse for future decisions. We introduce RetroAgent, an online RL framework that trains agents to master interactive environments not merely by solving tasks, but by evolving across episodes. Inspired by human retrospective self-improvement, Re"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.08561","kind":"arxiv","version":6},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-09T16:23:33Z","cross_cats_sorted":[],"title_canon_sha256":"a49aa9a10dfccef67588d36858d226a7720ee81b28ba216be5fd1773ca3cbe30","abstract_canon_sha256":"98c836dca69a8d56efc728236eaa7575b4c368ab0a19c8de6c8e5c2ab2ff65ea"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:09:58.194381Z","signature_b64":"3ZjX2EhGwwGgZ8avE+wXqw5KiI3+bWS05RJ+AWmB057ioKH30hdzbrH5+oZ2vjmdHIfsBCUpXzyJ8uhF4uwBCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8dff008da37f16c68735f8ea671e0b337290ae6ae1b653794f1efde4f26b7c9c","last_reissued_at":"2026-06-10T01:09:58.193244Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:09:58.193244Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RetroAgent: From Solving to Evolving via Retrospective Dual Intrinsic Feedback","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Wenqi Shao, Xia Hu, Xiaoying Zhang, Yipeng Zhang, Zichen Liu","submitted_at":"2026-03-09T16:23:33Z","abstract_excerpt":"Standard reinforcement learning (RL) for large language model (LLM) agents primarily optimizes extrinsic task rewards, often favoring isolated task completion over continual adaptation. This paradigm can cause premature convergence to suboptimal policies and leaves useful experience only implicitly encoded in model parameters, limiting its retrieval and reuse for future decisions. We introduce RetroAgent, an online RL framework that trains agents to master interactive environments not merely by solving tasks, but by evolving across episodes. Inspired by human retrospective self-improvement, Re"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.08561","kind":"arxiv","version":6},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.08561/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.08561","created_at":"2026-06-10T01:09:58.193419+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.08561v6","created_at":"2026-06-10T01:09:58.193419+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.08561","created_at":"2026-06-10T01:09:58.193419+00:00"},{"alias_kind":"pith_short_12","alias_value":"RX7QBDNDP4LM","created_at":"2026-06-10T01:09:58.193419+00:00"},{"alias_kind":"pith_short_16","alias_value":"RX7QBDNDP4LMNBZV","created_at":"2026-06-10T01:09:58.193419+00:00"},{"alias_kind":"pith_short_8","alias_value":"RX7QBDND","created_at":"2026-06-10T01:09:58.193419+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"2605.06130","citing_title":"Skill1: Unified Evolution of Skill-Augmented Agents via Reinforcement Learning","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10663","citing_title":"Evolving-RL: End-to-End Optimization of Experience-Driven Self-Evolving Capability within Agents","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06130","citing_title":"Skill1: Unified Evolution of Skill-Augmented Agents via Reinforcement Learning","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06130","citing_title":"Skill1: Unified Evolution of Skill-Augmented Agents via Reinforcement Learning","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08013","citing_title":"Learning CLI Agents with Structured Action Credit under Selective Observation","ref_index":74,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN","json":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN.json","graph_json":"https://pith.science/api/pith-number/RX7QBDNDP4LMNBZV7DVGOHQLGN/graph.json","events_json":"https://pith.science/api/pith-number/RX7QBDNDP4LMNBZV7DVGOHQLGN/events.json","paper":"https://pith.science/paper/RX7QBDND"},"agent_actions":{"view_html":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN","download_json":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN.json","view_paper":"https://pith.science/paper/RX7QBDND","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.08561&json=true","fetch_graph":"https://pith.science/api/pith-number/RX7QBDNDP4LMNBZV7DVGOHQLGN/graph.json","fetch_events":"https://pith.science/api/pith-number/RX7QBDNDP4LMNBZV7DVGOHQLGN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN/action/storage_attestation","attest_author":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN/action/author_attestation","sign_citation":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN/action/citation_signature","submit_replication":"https://pith.science/pith/RX7QBDNDP4LMNBZV7DVGOHQLGN/action/replication_record"}},"created_at":"2026-06-10T01:09:58.193419+00:00","updated_at":"2026-06-10T01:09:58.193419+00:00"}