{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:T2T3YSUBUCAN44YWONBNR45Q5I","short_pith_number":"pith:T2T3YSUB","canonical_record":{"source":{"id":"1810.12894","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-30T17:44:42Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"1ab08f6ae7f04c37cf32cf0f21e35a01ad7a96be3077cfa08bdb748f2fa92de3","abstract_canon_sha256":"2f694bab426974257335b0bdaaf4903335e3d31a07e1da401c50d60cb7f00b29"},"schema_version":"1.0"},"canonical_sha256":"9ea7bc4a81a080de73167342d8f3b0ea0d36579ca63c2c64eeb9a042cc4a617b","source":{"kind":"arxiv","id":"1810.12894","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.12894","created_at":"2026-05-18T00:01:53Z"},{"alias_kind":"arxiv_version","alias_value":"1810.12894v1","created_at":"2026-05-18T00:01:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.12894","created_at":"2026-05-18T00:01:53Z"},{"alias_kind":"pith_short_12","alias_value":"T2T3YSUBUCAN","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"T2T3YSUBUCAN44YW","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"T2T3YSUB","created_at":"2026-05-18T12:32:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:T2T3YSUBUCAN44YWONBNR45Q5I","target":"record","payload":{"canonical_record":{"source":{"id":"1810.12894","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-30T17:44:42Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"1ab08f6ae7f04c37cf32cf0f21e35a01ad7a96be3077cfa08bdb748f2fa92de3","abstract_canon_sha256":"2f694bab426974257335b0bdaaf4903335e3d31a07e1da401c50d60cb7f00b29"},"schema_version":"1.0"},"canonical_sha256":"9ea7bc4a81a080de73167342d8f3b0ea0d36579ca63c2c64eeb9a042cc4a617b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:53.903424Z","signature_b64":"FUJGDkoWsJth2jOPUtVRnFVRI0I4kOna7wlWv0lfQeiDA56+Ty3Py3BNK/dfTFnWCKLC+Drq1qLnaILVqB05DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9ea7bc4a81a080de73167342d8f3b0ea0d36579ca63c2c64eeb9a042cc4a617b","last_reissued_at":"2026-05-18T00:01:53.902962Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:53.902962Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1810.12894","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9GnHsoU8cVtN1d7xCUISKdlJ5Hyt9HEvEuerYvZ4/uZP2yQYMwmVLfgbNDpCkDOPCIDUWtzlq6fQ3FH3ILgNCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-20T10:38:47.727748Z"},"content_sha256":"96f03ea820d2f1ae7e31657312304d3dd1a6b47800b63fc545cc70dfde10bd31","schema_version":"1.0","event_id":"sha256:96f03ea820d2f1ae7e31657312304d3dd1a6b47800b63fc545cc70dfde10bd31"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:T2T3YSUBUCAN44YWONBNR45Q5I","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Exploration by Random Network Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Amos Storkey, Harrison Edwards, Oleg Klimov, Yuri Burda","submitted_at":"2018-10-30T17:44:42Z","abstract_excerpt":"We introduce an exploration bonus for deep reinforcement learning methods that is easy to implement and adds minimal overhead to the computation performed. The bonus is the error of a neural network predicting features of the observations given by a fixed randomly initialized neural network. We also introduce a method to flexibly combine intrinsic and extrinsic rewards. We find that the random network distillation (RND) bonus combined with this increased flexibility enables significant progress on several hard exploration Atari games. In particular we establish state of the art performance on "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.12894","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sioUWgCRaDNvzVPs3SXMywK+wq9QkCzUyfeHg9oimezE34qg6q3OIb9itaV4cVzQ6dlJXnwA1yI69IYtTfVGBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-20T10:38:47.728094Z"},"content_sha256":"aedfece749ff6fbf279a79121c2b6f95236000d51b876f79af3c477784d4bace","schema_version":"1.0","event_id":"sha256:aedfece749ff6fbf279a79121c2b6f95236000d51b876f79af3c477784d4bace"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/bundle.json","state_url":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/T2T3YSUBUCAN44YWONBNR45Q5I/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-20T10:38:47Z","links":{"resolver":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I","bundle":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/bundle.json","state":"https://pith.science/pith/T2T3YSUBUCAN44YWONBNR45Q5I/state.json","well_known_bundle":"https://pith.science/.well-known/pith/T2T3YSUBUCAN44YWONBNR45Q5I/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:T2T3YSUBUCAN44YWONBNR45Q5I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2f694bab426974257335b0bdaaf4903335e3d31a07e1da401c50d60cb7f00b29","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-30T17:44:42Z","title_canon_sha256":"1ab08f6ae7f04c37cf32cf0f21e35a01ad7a96be3077cfa08bdb748f2fa92de3"},"schema_version":"1.0","source":{"id":"1810.12894","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.12894","created_at":"2026-05-18T00:01:53Z"},{"alias_kind":"arxiv_version","alias_value":"1810.12894v1","created_at":"2026-05-18T00:01:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.12894","created_at":"2026-05-18T00:01:53Z"},{"alias_kind":"pith_short_12","alias_value":"T2T3YSUBUCAN","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"T2T3YSUBUCAN44YW","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"T2T3YSUB","created_at":"2026-05-18T12:32:53Z"}],"graph_snapshots":[{"event_id":"sha256:aedfece749ff6fbf279a79121c2b6f95236000d51b876f79af3c477784d4bace","target":"graph","created_at":"2026-05-18T00:01:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce an exploration bonus for deep reinforcement learning methods that is easy to implement and adds minimal overhead to the computation performed. The bonus is the error of a neural network predicting features of the observations given by a fixed randomly initialized neural network. We also introduce a method to flexibly combine intrinsic and extrinsic rewards. We find that the random network distillation (RND) bonus combined with this increased flexibility enables significant progress on several hard exploration Atari games. In particular we establish state of the art performance on ","authors_text":"Amos Storkey, Harrison Edwards, Oleg Klimov, Yuri Burda","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-30T17:44:42Z","title":"Exploration by Random Network Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.12894","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:96f03ea820d2f1ae7e31657312304d3dd1a6b47800b63fc545cc70dfde10bd31","target":"record","created_at":"2026-05-18T00:01:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2f694bab426974257335b0bdaaf4903335e3d31a07e1da401c50d60cb7f00b29","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-10-30T17:44:42Z","title_canon_sha256":"1ab08f6ae7f04c37cf32cf0f21e35a01ad7a96be3077cfa08bdb748f2fa92de3"},"schema_version":"1.0","source":{"id":"1810.12894","kind":"arxiv","version":1}},"canonical_sha256":"9ea7bc4a81a080de73167342d8f3b0ea0d36579ca63c2c64eeb9a042cc4a617b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9ea7bc4a81a080de73167342d8f3b0ea0d36579ca63c2c64eeb9a042cc4a617b","first_computed_at":"2026-05-18T00:01:53.902962Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:01:53.902962Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FUJGDkoWsJth2jOPUtVRnFVRI0I4kOna7wlWv0lfQeiDA56+Ty3Py3BNK/dfTFnWCKLC+Drq1qLnaILVqB05DA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:01:53.903424Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.12894","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:96f03ea820d2f1ae7e31657312304d3dd1a6b47800b63fc545cc70dfde10bd31","sha256:aedfece749ff6fbf279a79121c2b6f95236000d51b876f79af3c477784d4bace"],"state_sha256":"24ab388e019c1d4195900f95d93f895b5ee53b3708ecc9f61862dddd1f97d08c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QH0wlHJw4xamNWrVWX6VRXtaar9f9e3WLLE1MTdxRrmrn5/CMcbsp2EGmuGIh4XpM1pbj7bN5U8KHHmepvRYDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-20T10:38:47.730069Z","bundle_sha256":"2495bd14e18b4b23f34917363791706b4e2f5ffd36cc57a20c4c7f545f7817ad"}}