{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2020:33HV3LACXRMNDB7IUQ2BVSNWY3","short_pith_number":"pith:33HV3LAC","canonical_record":{"source":{"id":"2011.01297","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-11-02T20:29:09Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"e2270c2ac4436c03cc3f5e586e8140af5ab2119fc2c146be05587fb0f83ded04","abstract_canon_sha256":"ef2e5744a7e42d7c0dc3c768213ff90633238d06a9e3b292a56d6bca62bfb196"},"schema_version":"1.0"},"canonical_sha256":"decf5dac02bc58d187e8a4341ac9b6c6e2674b4995c23ed22b43e3aa297db373","source":{"kind":"arxiv","id":"2011.01297","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2011.01297","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"arxiv_version","alias_value":"2011.01297v1","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2011.01297","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"pith_short_12","alias_value":"33HV3LACXRMN","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"pith_short_16","alias_value":"33HV3LACXRMNDB7I","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"pith_short_8","alias_value":"33HV3LAC","created_at":"2026-07-05T01:48:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2020:33HV3LACXRMNDB7IUQ2BVSNWY3","target":"record","payload":{"canonical_record":{"source":{"id":"2011.01297","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-11-02T20:29:09Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"e2270c2ac4436c03cc3f5e586e8140af5ab2119fc2c146be05587fb0f83ded04","abstract_canon_sha256":"ef2e5744a7e42d7c0dc3c768213ff90633238d06a9e3b292a56d6bca62bfb196"},"schema_version":"1.0"},"canonical_sha256":"decf5dac02bc58d187e8a4341ac9b6c6e2674b4995c23ed22b43e3aa297db373","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T01:48:31.083774Z","signature_b64":"SXJ2z20bC37C7Fx6UWg/H0fiGROenwP5n5ORP/U2joxDTwfczPIuBp2+8NJj1c6xfig3ytXmyOMIS6kdfwFjAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"decf5dac02bc58d187e8a4341ac9b6c6e2674b4995c23ed22b43e3aa297db373","last_reissued_at":"2026-07-05T01:48:31.083358Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T01:48:31.083358Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2011.01297","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T01:48:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"n6uuxMCP0Fb9XlDfZ3uTDCv8P/in8O/ymbRYxlJ4H26jz0CzydTNyPTkDcCDo6dfz/XfzhJdp8MnqzCNUsNgCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T08:05:16.159203Z"},"content_sha256":"bb77f9b7e11db536a5c492050e4bed238585cde418f03e78c2fbeb0e42cb6a1c","schema_version":"1.0","event_id":"sha256:bb77f9b7e11db536a5c492050e4bed238585cde418f03e78c2fbeb0e42cb6a1c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2020:33HV3LACXRMNDB7IUQ2BVSNWY3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Useful Policy Invariant Shaping from Arbitrary Advice","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Anna Harutyunyan, Matthew E. Taylor, Michael Bowling, Paniz Behboudian, Yash Satsangi","submitted_at":"2020-11-02T20:29:09Z","abstract_excerpt":"Reinforcement learning is a powerful learning paradigm in which agents can learn to maximize sparse and delayed reward signals. Although RL has had many impressive successes in complex domains, learning can take hours, days, or even years of training data. A major challenge of contemporary RL research is to discover how to learn with less data. Previous work has shown that domain information can be successfully used to shape the reward; by adding additional reward information, the agent can learn with much less data. Furthermore, if the reward is constructed from a potential function, the opti"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2011.01297","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2011.01297/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T01:48:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Tjk2CYBINRestrOCKcr2SLgQaE2xahdzRfuK8B4hRc7VEH0Y3Ze3fBM9NQcXZ1Cteb0+o3MIKYBIlUJIu7VlDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T08:05:16.159588Z"},"content_sha256":"4c3a8fc28fd6ed69f1b1683b536af27128ebeb4a1887b3808126c1baebd080d4","schema_version":"1.0","event_id":"sha256:4c3a8fc28fd6ed69f1b1683b536af27128ebeb4a1887b3808126c1baebd080d4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/33HV3LACXRMNDB7IUQ2BVSNWY3/bundle.json","state_url":"https://pith.science/pith/33HV3LACXRMNDB7IUQ2BVSNWY3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/33HV3LACXRMNDB7IUQ2BVSNWY3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T08:05:16Z","links":{"resolver":"https://pith.science/pith/33HV3LACXRMNDB7IUQ2BVSNWY3","bundle":"https://pith.science/pith/33HV3LACXRMNDB7IUQ2BVSNWY3/bundle.json","state":"https://pith.science/pith/33HV3LACXRMNDB7IUQ2BVSNWY3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/33HV3LACXRMNDB7IUQ2BVSNWY3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:33HV3LACXRMNDB7IUQ2BVSNWY3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ef2e5744a7e42d7c0dc3c768213ff90633238d06a9e3b292a56d6bca62bfb196","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-11-02T20:29:09Z","title_canon_sha256":"e2270c2ac4436c03cc3f5e586e8140af5ab2119fc2c146be05587fb0f83ded04"},"schema_version":"1.0","source":{"id":"2011.01297","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2011.01297","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"arxiv_version","alias_value":"2011.01297v1","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2011.01297","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"pith_short_12","alias_value":"33HV3LACXRMN","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"pith_short_16","alias_value":"33HV3LACXRMNDB7I","created_at":"2026-07-05T01:48:31Z"},{"alias_kind":"pith_short_8","alias_value":"33HV3LAC","created_at":"2026-07-05T01:48:31Z"}],"graph_snapshots":[{"event_id":"sha256:4c3a8fc28fd6ed69f1b1683b536af27128ebeb4a1887b3808126c1baebd080d4","target":"graph","created_at":"2026-07-05T01:48:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2011.01297/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning is a powerful learning paradigm in which agents can learn to maximize sparse and delayed reward signals. Although RL has had many impressive successes in complex domains, learning can take hours, days, or even years of training data. A major challenge of contemporary RL research is to discover how to learn with less data. Previous work has shown that domain information can be successfully used to shape the reward; by adding additional reward information, the agent can learn with much less data. Furthermore, if the reward is constructed from a potential function, the opti","authors_text":"Anna Harutyunyan, Matthew E. Taylor, Michael Bowling, Paniz Behboudian, Yash Satsangi","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-11-02T20:29:09Z","title":"Useful Policy Invariant Shaping from Arbitrary Advice"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2011.01297","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bb77f9b7e11db536a5c492050e4bed238585cde418f03e78c2fbeb0e42cb6a1c","target":"record","created_at":"2026-07-05T01:48:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ef2e5744a7e42d7c0dc3c768213ff90633238d06a9e3b292a56d6bca62bfb196","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-11-02T20:29:09Z","title_canon_sha256":"e2270c2ac4436c03cc3f5e586e8140af5ab2119fc2c146be05587fb0f83ded04"},"schema_version":"1.0","source":{"id":"2011.01297","kind":"arxiv","version":1}},"canonical_sha256":"decf5dac02bc58d187e8a4341ac9b6c6e2674b4995c23ed22b43e3aa297db373","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"decf5dac02bc58d187e8a4341ac9b6c6e2674b4995c23ed22b43e3aa297db373","first_computed_at":"2026-07-05T01:48:31.083358Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T01:48:31.083358Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"SXJ2z20bC37C7Fx6UWg/H0fiGROenwP5n5ORP/U2joxDTwfczPIuBp2+8NJj1c6xfig3ytXmyOMIS6kdfwFjAQ==","signature_status":"signed_v1","signed_at":"2026-07-05T01:48:31.083774Z","signed_message":"canonical_sha256_bytes"},"source_id":"2011.01297","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bb77f9b7e11db536a5c492050e4bed238585cde418f03e78c2fbeb0e42cb6a1c","sha256:4c3a8fc28fd6ed69f1b1683b536af27128ebeb4a1887b3808126c1baebd080d4"],"state_sha256":"55d2b78a312e09253ddbf0d2686f6de1b523cc38029ab122914dd8b23f06d4f9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xd7/L26EGZIslhiFZPbWDEDVVbBgD8c6EZqTvpISwbk3kBXF8F0NFu5WqDF6NyoV7Qoesf7SeS3Wtt5OBl3pBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T08:05:16.161616Z","bundle_sha256":"9c5aa6eb0352bad56a5682c277ef57a477c71875f6f3a85c087811cdb5268507"}}