{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:B44GYKFGHALJFXLGL4LHGGFCQB","short_pith_number":"pith:B44GYKFG","schema_version":"1.0","canonical_sha256":"0f386c28a6381692dd665f167318a2805b422e0d969e2f2ea62a42c1697e5a5d","source":{"kind":"arxiv","id":"1708.05866","version":2},"attestation_state":"computed","paper":{"title":"A Brief Survey of Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anil Anthony Bharath, Kai Arulkumaran, Marc Peter Deisenroth, Miles Brundage","submitted_at":"2017-08-19T15:55:31Z","abstract_excerpt":"Deep reinforcement learning is poised to revolutionise the field of AI and represents a step towards building autonomous systems with a higher level understanding of the visual world. Currently, deep learning is enabling reinforcement learning to scale to problems that were previously intractable, such as learning to play video games directly from pixels. Deep reinforcement learning algorithms are also applied to robotics, allowing control policies for robots to be learned directly from camera inputs in the real world. In this survey, we begin with an introduction to the general field of reinf"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1708.05866","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-08-19T15:55:31Z","cross_cats_sorted":["cs.AI","cs.CV","stat.ML"],"title_canon_sha256":"b9a05db4ac4a20dd4cfc26334722a5d7bb8a3c5356242c2f762af1c67bace861","abstract_canon_sha256":"2455e2f0cfcfd4721ba4f35d16ff8d99c4520c8787dfa549814b3533d6f1addf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:30:41.805746Z","signature_b64":"XRodEyawqFxtY9kH2/mhJUW70AC//milY7NNALIxPtiy4a6cpvHD7MzM18kGoXsuzV1eIj3IAPRlQkjF8elvDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0f386c28a6381692dd665f167318a2805b422e0d969e2f2ea62a42c1697e5a5d","last_reissued_at":"2026-05-18T00:30:41.805337Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:30:41.805337Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Brief Survey of Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anil Anthony Bharath, Kai Arulkumaran, Marc Peter Deisenroth, Miles Brundage","submitted_at":"2017-08-19T15:55:31Z","abstract_excerpt":"Deep reinforcement learning is poised to revolutionise the field of AI and represents a step towards building autonomous systems with a higher level understanding of the visual world. Currently, deep learning is enabling reinforcement learning to scale to problems that were previously intractable, such as learning to play video games directly from pixels. Deep reinforcement learning algorithms are also applied to robotics, allowing control policies for robots to be learned directly from camera inputs in the real world. In this survey, we begin with an introduction to the general field of reinf"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.05866","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1708.05866","created_at":"2026-05-18T00:30:41.805400+00:00"},{"alias_kind":"arxiv_version","alias_value":"1708.05866v2","created_at":"2026-05-18T00:30:41.805400+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.05866","created_at":"2026-05-18T00:30:41.805400+00:00"},{"alias_kind":"pith_short_12","alias_value":"B44GYKFGHALJ","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_16","alias_value":"B44GYKFGHALJFXLG","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_8","alias_value":"B44GYKFG","created_at":"2026-05-18T12:31:08.081275+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":9,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"1907.09475","citing_title":"Deep Reinforcement Learning for Clinical Decision Support: A Brief Survey","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11754","citing_title":"Deep Reinforcement Learning for Personalized Search Story Recommendation","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2405.14093","citing_title":"A Survey on Vision-Language-Action Models for Embodied AI","ref_index":300,"is_internal_anchor":true},{"citing_arxiv_id":"2411.04832","citing_title":"Plasticity Loss in Deep Reinforcement Learning: A Survey","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04539","citing_title":"FlashSAC: Fast and Stable Off-Policy Reinforcement Learning for High-Dimensional Robot Control","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24338","citing_title":"Perfecting Aircraft Maneuvers with Reinforcement Learning","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"2604.24403","citing_title":"An Automatic Ground Collision Avoidance System with Reinforcement Learning","ref_index":11,"is_internal_anchor":false},{"citing_arxiv_id":"2604.24355","citing_title":"An Aircraft Upset Recovery System with Reinforcement Learning","ref_index":18,"is_internal_anchor":false},{"citing_arxiv_id":"2604.04539","citing_title":"FlashSAC: Fast and Stable Off-Policy Reinforcement Learning for High-Dimensional Robot Control","ref_index":3,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB","json":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB.json","graph_json":"https://pith.science/api/pith-number/B44GYKFGHALJFXLGL4LHGGFCQB/graph.json","events_json":"https://pith.science/api/pith-number/B44GYKFGHALJFXLGL4LHGGFCQB/events.json","paper":"https://pith.science/paper/B44GYKFG"},"agent_actions":{"view_html":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB","download_json":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB.json","view_paper":"https://pith.science/paper/B44GYKFG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1708.05866&json=true","fetch_graph":"https://pith.science/api/pith-number/B44GYKFGHALJFXLGL4LHGGFCQB/graph.json","fetch_events":"https://pith.science/api/pith-number/B44GYKFGHALJFXLGL4LHGGFCQB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB/action/storage_attestation","attest_author":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB/action/author_attestation","sign_citation":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB/action/citation_signature","submit_replication":"https://pith.science/pith/B44GYKFGHALJFXLGL4LHGGFCQB/action/replication_record"}},"created_at":"2026-05-18T00:30:41.805400+00:00","updated_at":"2026-05-18T00:30:41.805400+00:00"}