{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:T4S57HPG4HI6MDL7LNXUO3XWLX","short_pith_number":"pith:T4S57HPG","canonical_record":{"source":{"id":"1604.03986","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-13T22:13:52Z","cross_cats_sorted":[],"title_canon_sha256":"c8aa6739760080bd7e29adf8ed41aa4a29497d7a93e8da46232c3667edaf2ab3","abstract_canon_sha256":"c9874af6967310f08f48d9b4693d3dbc5ff793a47c0645c238ea56176c46368c"},"schema_version":"1.0"},"canonical_sha256":"9f25df9de6e1d1e60d7f5b6f476ef65de0fd943d34964e490d661accf23012c0","source":{"kind":"arxiv","id":"1604.03986","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1604.03986","created_at":"2026-05-18T01:17:07Z"},{"alias_kind":"arxiv_version","alias_value":"1604.03986v1","created_at":"2026-05-18T01:17:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1604.03986","created_at":"2026-05-18T01:17:07Z"},{"alias_kind":"pith_short_12","alias_value":"T4S57HPG4HI6","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_16","alias_value":"T4S57HPG4HI6MDL7","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_8","alias_value":"T4S57HPG","created_at":"2026-05-18T12:30:44Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:T4S57HPG4HI6MDL7LNXUO3XWLX","target":"record","payload":{"canonical_record":{"source":{"id":"1604.03986","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-13T22:13:52Z","cross_cats_sorted":[],"title_canon_sha256":"c8aa6739760080bd7e29adf8ed41aa4a29497d7a93e8da46232c3667edaf2ab3","abstract_canon_sha256":"c9874af6967310f08f48d9b4693d3dbc5ff793a47c0645c238ea56176c46368c"},"schema_version":"1.0"},"canonical_sha256":"9f25df9de6e1d1e60d7f5b6f476ef65de0fd943d34964e490d661accf23012c0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:17:07.015707Z","signature_b64":"/ZJFJwpUHH6VYipDLM6r3XFnaLzg8Wqy4Em2+i88Sz/CAOgEZrW7iTxvSjq21xca5WfIQs1pk8ybBzObVLDVCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9f25df9de6e1d1e60d7f5b6f476ef65de0fd943d34964e490d661accf23012c0","last_reissued_at":"2026-05-18T01:17:07.014962Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:17:07.014962Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1604.03986","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:17:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xGahSEVXNEmQGTtyIMXvohtf3X9GVARyTXcGP0FIY4AXuth++Ecw/TwFO8LyISRxWVhP15wRkupxLi0GOD0bBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T13:02:55.307871Z"},"content_sha256":"d79ea94c3952e9bcc2f2312dbc589c37ae5b5422a0937cdb46a9481833ffb654","schema_version":"1.0","event_id":"sha256:d79ea94c3952e9bcc2f2312dbc589c37ae5b5422a0937cdb46a9481833ffb654"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:T4S57HPG4HI6MDL7LNXUO3XWLX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Theoretically-Grounded Policy Advice from Multiple Teachers in Reinforcement Learning Settings with Applications to Negative Transfer","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Haitham Bou Ammar, Matthew E. Taylor, Yusen Zhan","submitted_at":"2016-04-13T22:13:52Z","abstract_excerpt":"Policy advice is a transfer learning method where a student agent is able to learn faster via advice from a teacher. However, both this and other reinforcement learning transfer methods have little theoretical analysis. This paper formally defines a setting where multiple teacher agents can provide advice to a student and introduces an algorithm to leverage both autonomous exploration and teacher's advice. Our regret bounds justify the intuition that good teachers help while bad teachers hurt. Using our formalization, we are also able to quantify, for the first time, when negative transfer can"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1604.03986","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:17:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"syGaUTPnZ6a9q8dYYzevec487RNJ6IqptonM8nk689CCyrYduaFrg2WiulJqQkbj5cmkCSK4PSjU4/4Xrx20BQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T13:02:55.308217Z"},"content_sha256":"f5be6ec8625e6839975a312c3a903f234524c59a1e5a025aaadbfbd990506790","schema_version":"1.0","event_id":"sha256:f5be6ec8625e6839975a312c3a903f234524c59a1e5a025aaadbfbd990506790"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/T4S57HPG4HI6MDL7LNXUO3XWLX/bundle.json","state_url":"https://pith.science/pith/T4S57HPG4HI6MDL7LNXUO3XWLX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/T4S57HPG4HI6MDL7LNXUO3XWLX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-25T13:02:55Z","links":{"resolver":"https://pith.science/pith/T4S57HPG4HI6MDL7LNXUO3XWLX","bundle":"https://pith.science/pith/T4S57HPG4HI6MDL7LNXUO3XWLX/bundle.json","state":"https://pith.science/pith/T4S57HPG4HI6MDL7LNXUO3XWLX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/T4S57HPG4HI6MDL7LNXUO3XWLX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:T4S57HPG4HI6MDL7LNXUO3XWLX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c9874af6967310f08f48d9b4693d3dbc5ff793a47c0645c238ea56176c46368c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-13T22:13:52Z","title_canon_sha256":"c8aa6739760080bd7e29adf8ed41aa4a29497d7a93e8da46232c3667edaf2ab3"},"schema_version":"1.0","source":{"id":"1604.03986","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1604.03986","created_at":"2026-05-18T01:17:07Z"},{"alias_kind":"arxiv_version","alias_value":"1604.03986v1","created_at":"2026-05-18T01:17:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1604.03986","created_at":"2026-05-18T01:17:07Z"},{"alias_kind":"pith_short_12","alias_value":"T4S57HPG4HI6","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_16","alias_value":"T4S57HPG4HI6MDL7","created_at":"2026-05-18T12:30:44Z"},{"alias_kind":"pith_short_8","alias_value":"T4S57HPG","created_at":"2026-05-18T12:30:44Z"}],"graph_snapshots":[{"event_id":"sha256:f5be6ec8625e6839975a312c3a903f234524c59a1e5a025aaadbfbd990506790","target":"graph","created_at":"2026-05-18T01:17:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Policy advice is a transfer learning method where a student agent is able to learn faster via advice from a teacher. However, both this and other reinforcement learning transfer methods have little theoretical analysis. This paper formally defines a setting where multiple teacher agents can provide advice to a student and introduces an algorithm to leverage both autonomous exploration and teacher's advice. Our regret bounds justify the intuition that good teachers help while bad teachers hurt. Using our formalization, we are also able to quantify, for the first time, when negative transfer can","authors_text":"Haitham Bou Ammar, Matthew E. Taylor, Yusen Zhan","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-13T22:13:52Z","title":"Theoretically-Grounded Policy Advice from Multiple Teachers in Reinforcement Learning Settings with Applications to Negative Transfer"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1604.03986","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d79ea94c3952e9bcc2f2312dbc589c37ae5b5422a0937cdb46a9481833ffb654","target":"record","created_at":"2026-05-18T01:17:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c9874af6967310f08f48d9b4693d3dbc5ff793a47c0645c238ea56176c46368c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-04-13T22:13:52Z","title_canon_sha256":"c8aa6739760080bd7e29adf8ed41aa4a29497d7a93e8da46232c3667edaf2ab3"},"schema_version":"1.0","source":{"id":"1604.03986","kind":"arxiv","version":1}},"canonical_sha256":"9f25df9de6e1d1e60d7f5b6f476ef65de0fd943d34964e490d661accf23012c0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9f25df9de6e1d1e60d7f5b6f476ef65de0fd943d34964e490d661accf23012c0","first_computed_at":"2026-05-18T01:17:07.014962Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:17:07.014962Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/ZJFJwpUHH6VYipDLM6r3XFnaLzg8Wqy4Em2+i88Sz/CAOgEZrW7iTxvSjq21xca5WfIQs1pk8ybBzObVLDVCg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:17:07.015707Z","signed_message":"canonical_sha256_bytes"},"source_id":"1604.03986","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d79ea94c3952e9bcc2f2312dbc589c37ae5b5422a0937cdb46a9481833ffb654","sha256:f5be6ec8625e6839975a312c3a903f234524c59a1e5a025aaadbfbd990506790"],"state_sha256":"1feb72b2ef3077a2e3a3867640ad9cd3a36e6b6d2e968b71d6a2cdeb784ddaf8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8Vqjnsw3ck1uIMnz9pgd5OTU4RT6P2ursXDRprWtPsVxwxhg3ETaVq7mrPXogAIvKrPddCLXbZEFQhBnaKS3Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-25T13:02:55.310167Z","bundle_sha256":"c55378bfa09c204ba71806f795832a9326df5beb9d57dee914e045b776d5de66"}}