{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:5MHMTJBMQG67COHYGCMT5HLYHK","short_pith_number":"pith:5MHMTJBM","canonical_record":{"source":{"id":"1903.02020","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-05T19:20:35Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"c0edeb7865489f672cdc0b8b574f44ae4ae58ada05fbdb28754fb5e396f101ad","abstract_canon_sha256":"5179ce8d85a0efef9d9911adf5cbdae03b5f531687a43794391f4d6806fac999"},"schema_version":"1.0"},"canonical_sha256":"eb0ec9a42c81bdf138f830993e9d783aa8a4cddb99a0d467f45f7c88ed0938d7","source":{"kind":"arxiv","id":"1903.02020","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.02020","created_at":"2026-05-17T23:44:37Z"},{"alias_kind":"arxiv_version","alias_value":"1903.02020v2","created_at":"2026-05-17T23:44:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.02020","created_at":"2026-05-17T23:44:37Z"},{"alias_kind":"pith_short_12","alias_value":"5MHMTJBMQG67","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"5MHMTJBMQG67COHY","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"5MHMTJBM","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:5MHMTJBMQG67COHYGCMT5HLYHK","target":"record","payload":{"canonical_record":{"source":{"id":"1903.02020","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-05T19:20:35Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"c0edeb7865489f672cdc0b8b574f44ae4ae58ada05fbdb28754fb5e396f101ad","abstract_canon_sha256":"5179ce8d85a0efef9d9911adf5cbdae03b5f531687a43794391f4d6806fac999"},"schema_version":"1.0"},"canonical_sha256":"eb0ec9a42c81bdf138f830993e9d783aa8a4cddb99a0d467f45f7c88ed0938d7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:37.663526Z","signature_b64":"2ZTSZkFzz6S2ao2cjMHb/+OtH/dFed/QFeFpocwXWO+genmGltaGf5jBvBcq4GEQeH9hrHLteBAAqKKWMP9IAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eb0ec9a42c81bdf138f830993e9d783aa8a4cddb99a0d467f45f7c88ed0938d7","last_reissued_at":"2026-05-17T23:44:37.663096Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:37.663096Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.02020","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sQ7OXSUKUdQx1qy6DfrzevxBX4RqDP7HXWN5q7DcuwN5IJFLHskAzstZByiTZ35ovF4MGl14d6gOzsqWopizBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T18:09:37.527036Z"},"content_sha256":"cbd4f29e6825f0aed820f741df7334797766c017b4326a8a8484d1f4c2472010","schema_version":"1.0","event_id":"sha256:cbd4f29e6825f0aed820f741df7334797766c017b4326a8a8484d1f4c2472010"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:5MHMTJBMQG67COHYGCMT5HLYHK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Using Natural Language for Reward Shaping in Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Prasoon Goyal, Raymond J. Mooney, Scott Niekum","submitted_at":"2019-03-05T19:20:35Z","abstract_excerpt":"Recent reinforcement learning (RL) approaches have shown strong performance in complex domains such as Atari games, but are often highly sample inefficient. A common approach to reduce interaction time with the environment is to use reward shaping, which involves carefully designing reward functions that provide the agent intermediate rewards for progress towards the goal. However, designing appropriate shaping rewards is known to be difficult as well as time-consuming. In this work, we address this problem by using natural language instructions to perform reward shaping. We propose the Langua"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.02020","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JRgaNzDRotfoW5dtWNt2v5DAIT3R+3hayCGPhIhwUcf9XYDPTPFHgvrlSD41o4t8/fOl/AWhANdvURmXG6UbDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T18:09:37.527398Z"},"content_sha256":"b820baab423feb76ce0976c51466dd544c9fc111d23eb61c71a729f580050257","schema_version":"1.0","event_id":"sha256:b820baab423feb76ce0976c51466dd544c9fc111d23eb61c71a729f580050257"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5MHMTJBMQG67COHYGCMT5HLYHK/bundle.json","state_url":"https://pith.science/pith/5MHMTJBMQG67COHYGCMT5HLYHK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5MHMTJBMQG67COHYGCMT5HLYHK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T18:09:37Z","links":{"resolver":"https://pith.science/pith/5MHMTJBMQG67COHYGCMT5HLYHK","bundle":"https://pith.science/pith/5MHMTJBMQG67COHYGCMT5HLYHK/bundle.json","state":"https://pith.science/pith/5MHMTJBMQG67COHYGCMT5HLYHK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5MHMTJBMQG67COHYGCMT5HLYHK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:5MHMTJBMQG67COHYGCMT5HLYHK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5179ce8d85a0efef9d9911adf5cbdae03b5f531687a43794391f4d6806fac999","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-05T19:20:35Z","title_canon_sha256":"c0edeb7865489f672cdc0b8b574f44ae4ae58ada05fbdb28754fb5e396f101ad"},"schema_version":"1.0","source":{"id":"1903.02020","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.02020","created_at":"2026-05-17T23:44:37Z"},{"alias_kind":"arxiv_version","alias_value":"1903.02020v2","created_at":"2026-05-17T23:44:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.02020","created_at":"2026-05-17T23:44:37Z"},{"alias_kind":"pith_short_12","alias_value":"5MHMTJBMQG67","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"5MHMTJBMQG67COHY","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"5MHMTJBM","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:b820baab423feb76ce0976c51466dd544c9fc111d23eb61c71a729f580050257","target":"graph","created_at":"2026-05-17T23:44:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recent reinforcement learning (RL) approaches have shown strong performance in complex domains such as Atari games, but are often highly sample inefficient. A common approach to reduce interaction time with the environment is to use reward shaping, which involves carefully designing reward functions that provide the agent intermediate rewards for progress towards the goal. However, designing appropriate shaping rewards is known to be difficult as well as time-consuming. In this work, we address this problem by using natural language instructions to perform reward shaping. We propose the Langua","authors_text":"Prasoon Goyal, Raymond J. Mooney, Scott Niekum","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-05T19:20:35Z","title":"Using Natural Language for Reward Shaping in Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.02020","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cbd4f29e6825f0aed820f741df7334797766c017b4326a8a8484d1f4c2472010","target":"record","created_at":"2026-05-17T23:44:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5179ce8d85a0efef9d9911adf5cbdae03b5f531687a43794391f4d6806fac999","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-05T19:20:35Z","title_canon_sha256":"c0edeb7865489f672cdc0b8b574f44ae4ae58ada05fbdb28754fb5e396f101ad"},"schema_version":"1.0","source":{"id":"1903.02020","kind":"arxiv","version":2}},"canonical_sha256":"eb0ec9a42c81bdf138f830993e9d783aa8a4cddb99a0d467f45f7c88ed0938d7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"eb0ec9a42c81bdf138f830993e9d783aa8a4cddb99a0d467f45f7c88ed0938d7","first_computed_at":"2026-05-17T23:44:37.663096Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:37.663096Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2ZTSZkFzz6S2ao2cjMHb/+OtH/dFed/QFeFpocwXWO+genmGltaGf5jBvBcq4GEQeH9hrHLteBAAqKKWMP9IAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:37.663526Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.02020","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cbd4f29e6825f0aed820f741df7334797766c017b4326a8a8484d1f4c2472010","sha256:b820baab423feb76ce0976c51466dd544c9fc111d23eb61c71a729f580050257"],"state_sha256":"eb44c81bf7651beeba3094b468c9b36ff0d5f3412e6c39b66c8bdd0ca564d883"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3u8Yi46u7WB/+Kc951FfpdtnkdKYyL/oE4PPleFinH7urXOEz3Tl1ncYDsmlvkjRaJEFwbKVpAXMHj0bZSNfBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T18:09:37.529727Z","bundle_sha256":"7b050a64de076c755640ea92142cf8a9804e34d6a4bb33c449eb6e89b1f660db"}}