{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:TDUX4B64KK7WFNIVIVFV3F4WXA","short_pith_number":"pith:TDUX4B64","schema_version":"1.0","canonical_sha256":"98e97e07dc52bf62b515454b5d9796b8320f9f3b0509e4a0a0328d390c9b7e93","source":{"kind":"arxiv","id":"1511.06342","version":4},"attestation_state":"computed","paper":{"title":"Actor-Mimic: Deep Multitask and Transfer Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Emilio Parisotto, Jimmy Lei Ba, Ruslan Salakhutdinov","submitted_at":"2015-11-19T20:17:27Z","abstract_excerpt":"The ability to act in multiple environments and transfer previous knowledge to new situations can be considered a critical aspect of any intelligent agent. Towards this goal, we define a novel method of multitask and transfer learning that enables an autonomous agent to learn how to behave in multiple tasks simultaneously, and then generalize its knowledge to new domains. This method, termed \"Actor-Mimic\", exploits the use of deep reinforcement learning and model compression techniques to train a single policy network that learns how to act in a set of distinct tasks by using the guidance of s"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1511.06342","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-11-19T20:17:27Z","cross_cats_sorted":[],"title_canon_sha256":"19f5006f94e78c5954ff9c36083c3a813836158a428f72f269e8e14cbbabb4f1","abstract_canon_sha256":"b9b1fe5e573e578eda9c68655809be9ee45237ddbf63b211cee78bf6aea71a92"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:20:16.457823Z","signature_b64":"4oR7dDpAzveM1+79iARcxbd6h6RDUNWFfucELgxlBzPpIBe0JBDktBgmU8IkJL7n6xareNZhKC8bx3NvgxoLCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"98e97e07dc52bf62b515454b5d9796b8320f9f3b0509e4a0a0328d390c9b7e93","last_reissued_at":"2026-05-18T01:20:16.457254Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:20:16.457254Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Actor-Mimic: Deep Multitask and Transfer Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Emilio Parisotto, Jimmy Lei Ba, Ruslan Salakhutdinov","submitted_at":"2015-11-19T20:17:27Z","abstract_excerpt":"The ability to act in multiple environments and transfer previous knowledge to new situations can be considered a critical aspect of any intelligent agent. Towards this goal, we define a novel method of multitask and transfer learning that enables an autonomous agent to learn how to behave in multiple tasks simultaneously, and then generalize its knowledge to new domains. This method, termed \"Actor-Mimic\", exploits the use of deep reinforcement learning and model compression techniques to train a single policy network that learns how to act in a set of distinct tasks by using the guidance of s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1511.06342","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1511.06342","created_at":"2026-05-18T01:20:16.457325+00:00"},{"alias_kind":"arxiv_version","alias_value":"1511.06342v4","created_at":"2026-05-18T01:20:16.457325+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1511.06342","created_at":"2026-05-18T01:20:16.457325+00:00"},{"alias_kind":"pith_short_12","alias_value":"TDUX4B64KK7W","created_at":"2026-05-18T12:29:42.218222+00:00"},{"alias_kind":"pith_short_16","alias_value":"TDUX4B64KK7WFNIV","created_at":"2026-05-18T12:29:42.218222+00:00"},{"alias_kind":"pith_short_8","alias_value":"TDUX4B64","created_at":"2026-05-18T12:29:42.218222+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"1907.02874","citing_title":"Attentive Multi-Task Deep Reinforcement Learning","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17486","citing_title":"DyGRO-VLA: Cross-Task Scaling of Vision-Language-Action Models via Dynamic Grouped Residual Optimization","ref_index":113,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13058","citing_title":"MUJICA: Multi-skill Unified Joint Integration of Control Architecture for Wheeled-Legged Robots","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05478","citing_title":"LANTERN: LLM-Augmented Neurosymbolic Transfer with Experience-Gated Reasoning Networks","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"2309.07864","citing_title":"The Rise and Potential of Large Language Model Based Agents: A Survey","ref_index":79,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01862","citing_title":"QHyer: Q-conditioned Hybrid Attention-mamba Transformer for Offline Goal-conditioned RL","ref_index":144,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17800","citing_title":"ReFineVLA: Multimodal Reasoning-Aware Generalist Robotic Policies via Teacher-Guided Fine-Tuning","ref_index":28,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA","json":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA.json","graph_json":"https://pith.science/api/pith-number/TDUX4B64KK7WFNIVIVFV3F4WXA/graph.json","events_json":"https://pith.science/api/pith-number/TDUX4B64KK7WFNIVIVFV3F4WXA/events.json","paper":"https://pith.science/paper/TDUX4B64"},"agent_actions":{"view_html":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA","download_json":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA.json","view_paper":"https://pith.science/paper/TDUX4B64","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1511.06342&json=true","fetch_graph":"https://pith.science/api/pith-number/TDUX4B64KK7WFNIVIVFV3F4WXA/graph.json","fetch_events":"https://pith.science/api/pith-number/TDUX4B64KK7WFNIVIVFV3F4WXA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA/action/storage_attestation","attest_author":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA/action/author_attestation","sign_citation":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA/action/citation_signature","submit_replication":"https://pith.science/pith/TDUX4B64KK7WFNIVIVFV3F4WXA/action/replication_record"}},"created_at":"2026-05-18T01:20:16.457325+00:00","updated_at":"2026-05-18T01:20:16.457325+00:00"}