{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:L7QIIC5KICB6OGZ56DXOGVYLB5","short_pith_number":"pith:L7QIIC5K","schema_version":"1.0","canonical_sha256":"5fe0840baa4083e71b3df0eee3570b0f696a2731bec51ba3cc5f6986e4c3a057","source":{"kind":"arxiv","id":"1606.04460","version":1},"attestation_state":"computed","paper":{"title":"Model-Free Episodic Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","q-bio.NC"],"primary_cat":"stat.ML","authors_text":"Alexander Pritzel, Avraham Ruderman, Benigno Uria, Charles Blundell, Daan Wierstra, Demis Hassabis, Jack Rae, Joel Z Leibo, Yazhe Li","submitted_at":"2016-06-14T17:03:46Z","abstract_excerpt":"State of the art deep reinforcement learning algorithms take many millions of interactions to attain human-level performance. Humans, on the other hand, can very quickly exploit highly rewarding nuances of an environment upon first discovery. In the brain, such rapid learning is thought to depend on the hippocampus and its capacity for episodic memory. Here we investigate whether a simple model of hippocampal episodic control can learn to solve difficult sequential decision-making tasks. We demonstrate that it not only attains a highly rewarding strategy significantly faster than state-of-the-"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1606.04460","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-06-14T17:03:46Z","cross_cats_sorted":["cs.LG","q-bio.NC"],"title_canon_sha256":"e9a8b8b5c1a0bc6c261e5ed19d5fc66f6cab73c2bb5efab6c401b20ed8fa3ecc","abstract_canon_sha256":"0fbbdb02e1b005ee8df66fc50e3410ba4c97ccde4b2cd32cb575a0187bd37066"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:12:25.585420Z","signature_b64":"zKhVdsdL87EuLOOFc4sUzdVbsQJ0D6PVhmxRHIEQJ7VhbBO/YI4v8Iew43Rg6kGhyoCVMCY6MdYlzCjlQJDNBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5fe0840baa4083e71b3df0eee3570b0f696a2731bec51ba3cc5f6986e4c3a057","last_reissued_at":"2026-05-18T01:12:25.585059Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:12:25.585059Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Model-Free Episodic Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","q-bio.NC"],"primary_cat":"stat.ML","authors_text":"Alexander Pritzel, Avraham Ruderman, Benigno Uria, Charles Blundell, Daan Wierstra, Demis Hassabis, Jack Rae, Joel Z Leibo, Yazhe Li","submitted_at":"2016-06-14T17:03:46Z","abstract_excerpt":"State of the art deep reinforcement learning algorithms take many millions of interactions to attain human-level performance. Humans, on the other hand, can very quickly exploit highly rewarding nuances of an environment upon first discovery. In the brain, such rapid learning is thought to depend on the hippocampus and its capacity for episodic memory. Here we investigate whether a simple model of hippocampal episodic control can learn to solve difficult sequential decision-making tasks. We demonstrate that it not only attains a highly rewarding strategy significantly faster than state-of-the-"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.04460","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1606.04460","created_at":"2026-05-18T01:12:25.585112+00:00"},{"alias_kind":"arxiv_version","alias_value":"1606.04460v1","created_at":"2026-05-18T01:12:25.585112+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.04460","created_at":"2026-05-18T01:12:25.585112+00:00"},{"alias_kind":"pith_short_12","alias_value":"L7QIIC5KICB6","created_at":"2026-05-18T12:30:29.479603+00:00"},{"alias_kind":"pith_short_16","alias_value":"L7QIIC5KICB6OGZ5","created_at":"2026-05-18T12:30:29.479603+00:00"},{"alias_kind":"pith_short_8","alias_value":"L7QIIC5K","created_at":"2026-05-18T12:30:29.479603+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":9,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"2510.10181","citing_title":"Dejavu: Towards Experience Feedback Learning for Embodied Intelligence","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2309.02427","citing_title":"Cognitive Architectures for Language Agents","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2602.20323","citing_title":"PhysMem: Scaling Test-Time Memory for Embodied Physical Reasoning","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2603.08388","citing_title":"A Hierarchical Error-Corrective Graph Framework for Autonomous Agents with LLM-Based Action Generation","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16331","citing_title":"BrainMem: Brain-Inspired Evolving Memory for Embodied Agent Task Planning","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2605.04651","citing_title":"FAAST: Forward-Only Associative Learning via Closed-Form Fast Weights for Test-Time Supervised Adaptation","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08756","citing_title":"Artifacts as Memory Beyond the Agent Boundary","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04651","citing_title":"FAAST: Forward-Only Associative Learning via Closed-Form Fast Weights for Test-Time Supervised Adaptation","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07108","citing_title":"Information as Structural Alignment: A Dynamical Theory of Continual Learning","ref_index":11,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5","json":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5.json","graph_json":"https://pith.science/api/pith-number/L7QIIC5KICB6OGZ56DXOGVYLB5/graph.json","events_json":"https://pith.science/api/pith-number/L7QIIC5KICB6OGZ56DXOGVYLB5/events.json","paper":"https://pith.science/paper/L7QIIC5K"},"agent_actions":{"view_html":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5","download_json":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5.json","view_paper":"https://pith.science/paper/L7QIIC5K","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1606.04460&json=true","fetch_graph":"https://pith.science/api/pith-number/L7QIIC5KICB6OGZ56DXOGVYLB5/graph.json","fetch_events":"https://pith.science/api/pith-number/L7QIIC5KICB6OGZ56DXOGVYLB5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5/action/storage_attestation","attest_author":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5/action/author_attestation","sign_citation":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5/action/citation_signature","submit_replication":"https://pith.science/pith/L7QIIC5KICB6OGZ56DXOGVYLB5/action/replication_record"}},"created_at":"2026-05-18T01:12:25.585112+00:00","updated_at":"2026-05-18T01:12:25.585112+00:00"}