{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:PMKALVCFNTQ7GGFPKVJRAJ74OY","short_pith_number":"pith:PMKALVCF","canonical_record":{"source":{"id":"1809.02070","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-06T16:08:39Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7a83d3e5f5474af719bc109823a450bce85c90b8d08bb8755e70eafc5f359cc9","abstract_canon_sha256":"96740674c4c13985648f65ef572aa529fdf5c11df5b54974623b5166f3df4b42"},"schema_version":"1.0"},"canonical_sha256":"7b1405d4456ce1f318af55531027fc763b0b1e6010679f7bd64201cd95dc6142","source":{"kind":"arxiv","id":"1809.02070","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.02070","created_at":"2026-05-18T00:06:17Z"},{"alias_kind":"arxiv_version","alias_value":"1809.02070v2","created_at":"2026-05-18T00:06:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.02070","created_at":"2026-05-18T00:06:17Z"},{"alias_kind":"pith_short_12","alias_value":"PMKALVCFNTQ7","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"PMKALVCFNTQ7GGFP","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"PMKALVCF","created_at":"2026-05-18T12:32:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:PMKALVCFNTQ7GGFPKVJRAJ74OY","target":"record","payload":{"canonical_record":{"source":{"id":"1809.02070","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-06T16:08:39Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"7a83d3e5f5474af719bc109823a450bce85c90b8d08bb8755e70eafc5f359cc9","abstract_canon_sha256":"96740674c4c13985648f65ef572aa529fdf5c11df5b54974623b5166f3df4b42"},"schema_version":"1.0"},"canonical_sha256":"7b1405d4456ce1f318af55531027fc763b0b1e6010679f7bd64201cd95dc6142","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:06:17.870480Z","signature_b64":"wUx8GsgpfSGE8FLqGRk5eUJ49Ljc7KZDjmj59qy9uXR7ayINBQ6HJiFXQaL+5l7TXLsnWSRbvzEiK+X0PUBBCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7b1405d4456ce1f318af55531027fc763b0b1e6010679f7bd64201cd95dc6142","last_reissued_at":"2026-05-18T00:06:17.870099Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:06:17.870099Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.02070","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J6n2ImtGdM8EzDTbZNQDHr10s8QaI3Rm7OFVIE80XW0UzU6ZUYGTr1oW/W7O9vS9UNdpfMlNTa9QXtMaKsKrBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T18:17:19.840737Z"},"content_sha256":"ae6e6a0a22a0435cd7b9408c8e1b2c1651c6f63a13ba82430d4e6d7abf4d13aa","schema_version":"1.0","event_id":"sha256:ae6e6a0a22a0435cd7b9408c8e1b2c1651c6f63a13ba82430d4e6d7abf4d13aa"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:PMKALVCFNTQ7GGFPKVJRAJ74OY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ARCHER: Aggressive Rewards to Counter bias in Hindsight Experience Replay","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Sameera Lanka, Tianfu Wu","submitted_at":"2018-09-06T16:08:39Z","abstract_excerpt":"Experience replay is an important technique for addressing sample-inefficiency in deep reinforcement learning (RL), but faces difficulty in learning from binary and sparse rewards due to disproportionately few successful experiences in the replay buffer. Hindsight experience replay (HER) was recently proposed to tackle this difficulty by manipulating unsuccessful transitions, but in doing so, HER introduces a significant bias in the replay buffer experiences and therefore achieves a suboptimal improvement in sample-efficiency. In this paper, we present an analysis on the source of bias in HER,"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.02070","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:06:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xAVpBSLg4Bl5Ye8uu0m4GO0xvEpzNFMcWs3Viab/4XJEe5k2hY+E5sM6USjT5J52dEeQQ6mjOagvF5/ZaX6aAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T18:17:19.841071Z"},"content_sha256":"92b8880275880ea2d6a333b61c3f59d9f36af4204fa05401f2c9f7de800c1efa","schema_version":"1.0","event_id":"sha256:92b8880275880ea2d6a333b61c3f59d9f36af4204fa05401f2c9f7de800c1efa"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PMKALVCFNTQ7GGFPKVJRAJ74OY/bundle.json","state_url":"https://pith.science/pith/PMKALVCFNTQ7GGFPKVJRAJ74OY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PMKALVCFNTQ7GGFPKVJRAJ74OY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T18:17:19Z","links":{"resolver":"https://pith.science/pith/PMKALVCFNTQ7GGFPKVJRAJ74OY","bundle":"https://pith.science/pith/PMKALVCFNTQ7GGFPKVJRAJ74OY/bundle.json","state":"https://pith.science/pith/PMKALVCFNTQ7GGFPKVJRAJ74OY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PMKALVCFNTQ7GGFPKVJRAJ74OY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:PMKALVCFNTQ7GGFPKVJRAJ74OY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"96740674c4c13985648f65ef572aa529fdf5c11df5b54974623b5166f3df4b42","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-06T16:08:39Z","title_canon_sha256":"7a83d3e5f5474af719bc109823a450bce85c90b8d08bb8755e70eafc5f359cc9"},"schema_version":"1.0","source":{"id":"1809.02070","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.02070","created_at":"2026-05-18T00:06:17Z"},{"alias_kind":"arxiv_version","alias_value":"1809.02070v2","created_at":"2026-05-18T00:06:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.02070","created_at":"2026-05-18T00:06:17Z"},{"alias_kind":"pith_short_12","alias_value":"PMKALVCFNTQ7","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"PMKALVCFNTQ7GGFP","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"PMKALVCF","created_at":"2026-05-18T12:32:46Z"}],"graph_snapshots":[{"event_id":"sha256:92b8880275880ea2d6a333b61c3f59d9f36af4204fa05401f2c9f7de800c1efa","target":"graph","created_at":"2026-05-18T00:06:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Experience replay is an important technique for addressing sample-inefficiency in deep reinforcement learning (RL), but faces difficulty in learning from binary and sparse rewards due to disproportionately few successful experiences in the replay buffer. Hindsight experience replay (HER) was recently proposed to tackle this difficulty by manipulating unsuccessful transitions, but in doing so, HER introduces a significant bias in the replay buffer experiences and therefore achieves a suboptimal improvement in sample-efficiency. In this paper, we present an analysis on the source of bias in HER,","authors_text":"Sameera Lanka, Tianfu Wu","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-06T16:08:39Z","title":"ARCHER: Aggressive Rewards to Counter bias in Hindsight Experience Replay"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.02070","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ae6e6a0a22a0435cd7b9408c8e1b2c1651c6f63a13ba82430d4e6d7abf4d13aa","target":"record","created_at":"2026-05-18T00:06:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"96740674c4c13985648f65ef572aa529fdf5c11df5b54974623b5166f3df4b42","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-06T16:08:39Z","title_canon_sha256":"7a83d3e5f5474af719bc109823a450bce85c90b8d08bb8755e70eafc5f359cc9"},"schema_version":"1.0","source":{"id":"1809.02070","kind":"arxiv","version":2}},"canonical_sha256":"7b1405d4456ce1f318af55531027fc763b0b1e6010679f7bd64201cd95dc6142","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7b1405d4456ce1f318af55531027fc763b0b1e6010679f7bd64201cd95dc6142","first_computed_at":"2026-05-18T00:06:17.870099Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:06:17.870099Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wUx8GsgpfSGE8FLqGRk5eUJ49Ljc7KZDjmj59qy9uXR7ayINBQ6HJiFXQaL+5l7TXLsnWSRbvzEiK+X0PUBBCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:06:17.870480Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.02070","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ae6e6a0a22a0435cd7b9408c8e1b2c1651c6f63a13ba82430d4e6d7abf4d13aa","sha256:92b8880275880ea2d6a333b61c3f59d9f36af4204fa05401f2c9f7de800c1efa"],"state_sha256":"36bf66f0b14bbf0063943811c80f2d5d10b4ecf3ce5cbce52df302f5840b2f01"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oyLisSqMyCzWF2OIkW9rfSzxe6ArxaVbXIpZQifSgSTATdXPgmFwcsbQT9V4c7S/+QPTnXD8h65fB96Ih7YlDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T18:17:19.842950Z","bundle_sha256":"e5fc99b5b9b449a785bbab013308650312719f680e109a9605050477c8390825"}}