{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:XDK2R7PAQEOUSMY245PHS5MDCH","short_pith_number":"pith:XDK2R7PA","schema_version":"1.0","canonical_sha256":"b8d5a8fde0811d49331ae75e79758311c5e53cf5328fcdb90918e5717d1a558c","source":{"kind":"arxiv","id":"1808.00177","version":5},"attestation_state":"computed","paper":{"title":"Learning Dexterous In-Hand Manipulation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alex Ray, Arthur Petron, Bob McGrew, Bowen Baker, Glenn Powell, Jakub Pachocki, Jonas Schneider, Josh Tobin, Lilian Weng, Maciek Chociej, Marcin Andrychowicz, Matthias Plappert, OpenAI, Peter Welinder, Rafal Jozefowicz, Szymon Sidor, Wojciech Zaremba","submitted_at":"2018-08-01T06:02:36Z","abstract_excerpt":"We use reinforcement learning (RL) to learn dexterous in-hand manipulation policies which can perform vision-based object reorientation on a physical Shadow Dexterous Hand. The training is performed in a simulated environment in which we randomize many of the physical properties of the system like friction coefficients and an object's appearance. Our policies transfer to the physical robot despite being trained entirely in simulation. Our method does not rely on any human demonstrations, but many behaviors found in human manipulation emerge naturally, including finger gaiting, multi-finger coo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1808.00177","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-01T06:02:36Z","cross_cats_sorted":["cs.AI","cs.RO","stat.ML"],"title_canon_sha256":"0d9b8e195b773ee4cc595c1da4e7dc419b32b0ae31e10ea503f165fc68ebcf2d","abstract_canon_sha256":"da879d95b57fe34d69384d317cd653b1ec5d08d281bc80be166c0c1388005243"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:55.822047Z","signature_b64":"lCMKSqpVRzJxck01/LAS7dX9RW76i4tGQEzh9UMaX7/z5hJHGu0zqgfvG8o4CBMaAICZrvhzHZgK+4uZdy3SBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b8d5a8fde0811d49331ae75e79758311c5e53cf5328fcdb90918e5717d1a558c","last_reissued_at":"2026-05-17T23:55:55.821422Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:55.821422Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Dexterous In-Hand Manipulation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alex Ray, Arthur Petron, Bob McGrew, Bowen Baker, Glenn Powell, Jakub Pachocki, Jonas Schneider, Josh Tobin, Lilian Weng, Maciek Chociej, Marcin Andrychowicz, Matthias Plappert, OpenAI, Peter Welinder, Rafal Jozefowicz, Szymon Sidor, Wojciech Zaremba","submitted_at":"2018-08-01T06:02:36Z","abstract_excerpt":"We use reinforcement learning (RL) to learn dexterous in-hand manipulation policies which can perform vision-based object reorientation on a physical Shadow Dexterous Hand. The training is performed in a simulated environment in which we randomize many of the physical properties of the system like friction coefficients and an object's appearance. Our policies transfer to the physical robot despite being trained entirely in simulation. Our method does not rely on any human demonstrations, but many behaviors found in human manipulation emerge naturally, including finger gaiting, multi-finger coo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.00177","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1808.00177","created_at":"2026-05-17T23:55:55.821534+00:00"},{"alias_kind":"arxiv_version","alias_value":"1808.00177v5","created_at":"2026-05-17T23:55:55.821534+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.00177","created_at":"2026-05-17T23:55:55.821534+00:00"},{"alias_kind":"pith_short_12","alias_value":"XDK2R7PAQEOU","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_16","alias_value":"XDK2R7PAQEOUSMY2","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_8","alias_value":"XDK2R7PA","created_at":"2026-05-18T12:33:01.666342+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":14,"internal_anchor_count":11,"sample":[{"citing_arxiv_id":"1906.09868","citing_title":"Pose Estimation for Non-Cooperative Rendezvous Using Neural Networks","ref_index":43,"is_internal_anchor":true},{"citing_arxiv_id":"1906.10124","citing_title":"On Multi-Agent Learning in Team Sports Games","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"1906.11633","citing_title":"ORRB -- OpenAI Remote Rendering Backend","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"1907.01475","citing_title":"Generalizing from a few environments in safety-critical reinforcement learning","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02057","citing_title":"Benchmarking Model-Based Reinforcement Learning","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"1907.04796","citing_title":"Bayesian Optimization in Variational Latent Spaces with Dynamic Compression","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"1907.11388","citing_title":"Learning to Solve a Rubik's Cube with a Dexterous Hand","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2405.14093","citing_title":"A Survey on Vision-Language-Action Models for Embodied AI","ref_index":230,"is_internal_anchor":true},{"citing_arxiv_id":"2412.02818","citing_title":"RoboMD: Uncovering Robot Vulnerabilities through Semantic Potential Fields","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19029","citing_title":"Distributionally Robust Control via Stein Variational Inference for Contact-Rich Manipulation","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"1910.11215","citing_title":"RoboNet: Large-Scale Multi-Robot Learning","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2108.10470","citing_title":"Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning","ref_index":5,"is_internal_anchor":false},{"citing_arxiv_id":"2604.25050","citing_title":"DiscreteRTC: Discrete Diffusion Policies are Natural Asynchronous Executors","ref_index":8,"is_internal_anchor":false},{"citing_arxiv_id":"2604.06067","citing_title":"HiPolicy: Hierarchical Multi-Frequency Action Chunking for Policy Learning","ref_index":1,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH","json":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH.json","graph_json":"https://pith.science/api/pith-number/XDK2R7PAQEOUSMY245PHS5MDCH/graph.json","events_json":"https://pith.science/api/pith-number/XDK2R7PAQEOUSMY245PHS5MDCH/events.json","paper":"https://pith.science/paper/XDK2R7PA"},"agent_actions":{"view_html":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH","download_json":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH.json","view_paper":"https://pith.science/paper/XDK2R7PA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1808.00177&json=true","fetch_graph":"https://pith.science/api/pith-number/XDK2R7PAQEOUSMY245PHS5MDCH/graph.json","fetch_events":"https://pith.science/api/pith-number/XDK2R7PAQEOUSMY245PHS5MDCH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH/action/storage_attestation","attest_author":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH/action/author_attestation","sign_citation":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH/action/citation_signature","submit_replication":"https://pith.science/pith/XDK2R7PAQEOUSMY245PHS5MDCH/action/replication_record"}},"created_at":"2026-05-17T23:55:55.821534+00:00","updated_at":"2026-05-17T23:55:55.821534+00:00"}