{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2022:CCOC2QU6YLMXKBVFHIF652HVVI","short_pith_number":"pith:CCOC2QU6","schema_version":"1.0","canonical_sha256":"109c2d429ec2d97506a53a0beee8f5aa0aa1216e67e5eb794422aec991a612c5","source":{"kind":"arxiv","id":"2204.05893","version":2},"attestation_state":"computed","paper":{"title":"Forgetting and Imbalance in Robot Lifelong Learning with Off-policy Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.RO","authors_text":"Abbas Abdolmaleki, Dushyant Rao, Jan Humplik, Markus Wulfmeier, Nicolas Heess, Steven Bohez, Tuomas Haarnoja, Wenxuan Zhou","submitted_at":"2022-04-12T15:40:53Z","abstract_excerpt":"Robots will experience non-stationary environment dynamics throughout their lifetime: the robot dynamics can change due to wear and tear, or its surroundings may change over time. Eventually, the robots should perform well in all of the environment variations it has encountered. At the same time, it should still be able to learn fast in a new environment. We identify two challenges in Reinforcement Learning (RL) under such a lifelong learning setting with off-policy data: first, existing off-policy algorithms struggle with the trade-off between being conservative to maintain good performance i"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2204.05893","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2022-04-12T15:40:53Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"f0850b2774cc22d57dcc9295c32ab7b33069b2c481247972ffc99aa508ae8728","abstract_canon_sha256":"6336c59cfb7e34bb9fd01b473b4e2f2d9c836cb86e7011550ae53cf3258549f8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T04:49:31.841601Z","signature_b64":"1poF0PxyPh344OMLNfw4bFXNWO7tNq6ANDYScKu+/XgnNz9H59SnxBypxF653v/Nf0o61A8QeS3fk52dINjIBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"109c2d429ec2d97506a53a0beee8f5aa0aa1216e67e5eb794422aec991a612c5","last_reissued_at":"2026-07-05T04:49:31.841216Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T04:49:31.841216Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Forgetting and Imbalance in Robot Lifelong Learning with Off-policy Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.RO","authors_text":"Abbas Abdolmaleki, Dushyant Rao, Jan Humplik, Markus Wulfmeier, Nicolas Heess, Steven Bohez, Tuomas Haarnoja, Wenxuan Zhou","submitted_at":"2022-04-12T15:40:53Z","abstract_excerpt":"Robots will experience non-stationary environment dynamics throughout their lifetime: the robot dynamics can change due to wear and tear, or its surroundings may change over time. Eventually, the robots should perform well in all of the environment variations it has encountered. At the same time, it should still be able to learn fast in a new environment. We identify two challenges in Reinforcement Learning (RL) under such a lifelong learning setting with off-policy data: first, existing off-policy algorithms struggle with the trade-off between being conservative to maintain good performance i"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2204.05893","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2204.05893/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2204.05893","created_at":"2026-07-05T04:49:31.841284+00:00"},{"alias_kind":"arxiv_version","alias_value":"2204.05893v2","created_at":"2026-07-05T04:49:31.841284+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2204.05893","created_at":"2026-07-05T04:49:31.841284+00:00"},{"alias_kind":"pith_short_12","alias_value":"CCOC2QU6YLMX","created_at":"2026-07-05T04:49:31.841284+00:00"},{"alias_kind":"pith_short_16","alias_value":"CCOC2QU6YLMXKBVF","created_at":"2026-07-05T04:49:31.841284+00:00"},{"alias_kind":"pith_short_8","alias_value":"CCOC2QU6","created_at":"2026-07-05T04:49:31.841284+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI","json":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI.json","graph_json":"https://pith.science/api/pith-number/CCOC2QU6YLMXKBVFHIF652HVVI/graph.json","events_json":"https://pith.science/api/pith-number/CCOC2QU6YLMXKBVFHIF652HVVI/events.json","paper":"https://pith.science/paper/CCOC2QU6"},"agent_actions":{"view_html":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI","download_json":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI.json","view_paper":"https://pith.science/paper/CCOC2QU6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2204.05893&json=true","fetch_graph":"https://pith.science/api/pith-number/CCOC2QU6YLMXKBVFHIF652HVVI/graph.json","fetch_events":"https://pith.science/api/pith-number/CCOC2QU6YLMXKBVFHIF652HVVI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI/action/storage_attestation","attest_author":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI/action/author_attestation","sign_citation":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI/action/citation_signature","submit_replication":"https://pith.science/pith/CCOC2QU6YLMXKBVFHIF652HVVI/action/replication_record"}},"created_at":"2026-07-05T04:49:31.841284+00:00","updated_at":"2026-07-05T04:49:31.841284+00:00"}