{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2021:ML7WDL3UOYPHVX3STGDZSVQKLA","short_pith_number":"pith:ML7WDL3U","canonical_record":{"source":{"id":"2105.06350","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2021-05-13T15:07:23Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"b5994341af4c02820d979b2761bb1bf0f84416511fb46fbced06bd0b7d95e408","abstract_canon_sha256":"4fd037f71c54437199db8dbca3f89d8c7000d763684466c4e7b5f4d38a313437"},"schema_version":"1.0"},"canonical_sha256":"62ff61af74761e7adf72998799560a5807d1e8b999415e67dc9daa8b471d9aa6","source":{"kind":"arxiv","id":"2105.06350","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2105.06350","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"arxiv_version","alias_value":"2105.06350v1","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2105.06350","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"pith_short_12","alias_value":"ML7WDL3UOYPH","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"pith_short_16","alias_value":"ML7WDL3UOYPHVX3S","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"pith_short_8","alias_value":"ML7WDL3U","created_at":"2026-07-05T02:40:06Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2021:ML7WDL3UOYPHVX3STGDZSVQKLA","target":"record","payload":{"canonical_record":{"source":{"id":"2105.06350","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2021-05-13T15:07:23Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"b5994341af4c02820d979b2761bb1bf0f84416511fb46fbced06bd0b7d95e408","abstract_canon_sha256":"4fd037f71c54437199db8dbca3f89d8c7000d763684466c4e7b5f4d38a313437"},"schema_version":"1.0"},"canonical_sha256":"62ff61af74761e7adf72998799560a5807d1e8b999415e67dc9daa8b471d9aa6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T02:40:06.665331Z","signature_b64":"/9dtXcCBI6OpSmfJbfYYGZ8rYa4VIGgu7y/n/AoM/2pWfd0OsWbIahcx0Rs2fiSHXN0bKKnDvpnXEsKLbFGICg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"62ff61af74761e7adf72998799560a5807d1e8b999415e67dc9daa8b471d9aa6","last_reissued_at":"2026-07-05T02:40:06.664869Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T02:40:06.664869Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2105.06350","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T02:40:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oBD6vDDshwZy5MKm+dfiApeHMEVsqYNZh3NCioACdxX5jmjsjNjygYlpuEytZjN3jK9EWZk08MsoKNRltN3jDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:17:49.618677Z"},"content_sha256":"e2ab8f7963ec70c7e4267f1c8f5bb14eb5934540c9350cd2f50c1ad60f05623b","schema_version":"1.0","event_id":"sha256:e2ab8f7963ec70c7e4267f1c8f5bb14eb5934540c9350cd2f50c1ad60f05623b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2021:ML7WDL3UOYPHVX3STGDZSVQKLA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MapGo: Model-Assisted Policy Optimization for Goal-Oriented Tasks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Deheng Ye, Jian Shen, Menghui Zhu, Minghuan Liu, Qiang Fu, Sheng Chen, Weinan Zhang, Wei Yang, Yong Yu, Zhicheng Zhang","submitted_at":"2021-05-13T15:07:23Z","abstract_excerpt":"In Goal-oriented Reinforcement learning, relabeling the raw goals in past experience to provide agents with hindsight ability is a major solution to the reward sparsity problem. In this paper, to enhance the diversity of relabeled goals, we develop FGI (Foresight Goal Inference), a new relabeling strategy that relabels the goals by looking into the future with a learned dynamics model. Besides, to improve sample efficiency, we propose to use the dynamics model to generate simulated trajectories for policy training. By integrating these two improvements, we introduce the MapGo framework (Model-"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2105.06350","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2105.06350/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T02:40:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"P1p1QtiKs3OCLoyNIo1NrH9t+0b0uoAeoM0xlVGP9BWQjxwV7wRfuM5soR3oMi5E3325fXVo8lE+Db5wECNeCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T09:17:49.619064Z"},"content_sha256":"4c837d44e883af67a637ae8af437a5ea19ec1f8e0fe4ed45e27eb6ee65a0577f","schema_version":"1.0","event_id":"sha256:4c837d44e883af67a637ae8af437a5ea19ec1f8e0fe4ed45e27eb6ee65a0577f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ML7WDL3UOYPHVX3STGDZSVQKLA/bundle.json","state_url":"https://pith.science/pith/ML7WDL3UOYPHVX3STGDZSVQKLA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ML7WDL3UOYPHVX3STGDZSVQKLA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T09:17:49Z","links":{"resolver":"https://pith.science/pith/ML7WDL3UOYPHVX3STGDZSVQKLA","bundle":"https://pith.science/pith/ML7WDL3UOYPHVX3STGDZSVQKLA/bundle.json","state":"https://pith.science/pith/ML7WDL3UOYPHVX3STGDZSVQKLA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ML7WDL3UOYPHVX3STGDZSVQKLA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2021:ML7WDL3UOYPHVX3STGDZSVQKLA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4fd037f71c54437199db8dbca3f89d8c7000d763684466c4e7b5f4d38a313437","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2021-05-13T15:07:23Z","title_canon_sha256":"b5994341af4c02820d979b2761bb1bf0f84416511fb46fbced06bd0b7d95e408"},"schema_version":"1.0","source":{"id":"2105.06350","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2105.06350","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"arxiv_version","alias_value":"2105.06350v1","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2105.06350","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"pith_short_12","alias_value":"ML7WDL3UOYPH","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"pith_short_16","alias_value":"ML7WDL3UOYPHVX3S","created_at":"2026-07-05T02:40:06Z"},{"alias_kind":"pith_short_8","alias_value":"ML7WDL3U","created_at":"2026-07-05T02:40:06Z"}],"graph_snapshots":[{"event_id":"sha256:4c837d44e883af67a637ae8af437a5ea19ec1f8e0fe4ed45e27eb6ee65a0577f","target":"graph","created_at":"2026-07-05T02:40:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2105.06350/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In Goal-oriented Reinforcement learning, relabeling the raw goals in past experience to provide agents with hindsight ability is a major solution to the reward sparsity problem. In this paper, to enhance the diversity of relabeled goals, we develop FGI (Foresight Goal Inference), a new relabeling strategy that relabels the goals by looking into the future with a learned dynamics model. Besides, to improve sample efficiency, we propose to use the dynamics model to generate simulated trajectories for policy training. By integrating these two improvements, we introduce the MapGo framework (Model-","authors_text":"Deheng Ye, Jian Shen, Menghui Zhu, Minghuan Liu, Qiang Fu, Sheng Chen, Weinan Zhang, Wei Yang, Yong Yu, Zhicheng Zhang","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2021-05-13T15:07:23Z","title":"MapGo: Model-Assisted Policy Optimization for Goal-Oriented Tasks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2105.06350","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e2ab8f7963ec70c7e4267f1c8f5bb14eb5934540c9350cd2f50c1ad60f05623b","target":"record","created_at":"2026-07-05T02:40:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4fd037f71c54437199db8dbca3f89d8c7000d763684466c4e7b5f4d38a313437","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2021-05-13T15:07:23Z","title_canon_sha256":"b5994341af4c02820d979b2761bb1bf0f84416511fb46fbced06bd0b7d95e408"},"schema_version":"1.0","source":{"id":"2105.06350","kind":"arxiv","version":1}},"canonical_sha256":"62ff61af74761e7adf72998799560a5807d1e8b999415e67dc9daa8b471d9aa6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"62ff61af74761e7adf72998799560a5807d1e8b999415e67dc9daa8b471d9aa6","first_computed_at":"2026-07-05T02:40:06.664869Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T02:40:06.664869Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/9dtXcCBI6OpSmfJbfYYGZ8rYa4VIGgu7y/n/AoM/2pWfd0OsWbIahcx0Rs2fiSHXN0bKKnDvpnXEsKLbFGICg==","signature_status":"signed_v1","signed_at":"2026-07-05T02:40:06.665331Z","signed_message":"canonical_sha256_bytes"},"source_id":"2105.06350","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e2ab8f7963ec70c7e4267f1c8f5bb14eb5934540c9350cd2f50c1ad60f05623b","sha256:4c837d44e883af67a637ae8af437a5ea19ec1f8e0fe4ed45e27eb6ee65a0577f"],"state_sha256":"c2283edbdb948913fb580d81ae79bb6bd0d5829601727662543e7f1681ff5c7d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yLB+rPJccWJEyR2kLGip+kwzQYli7Lzs1UNu9HK2csi18ZHQTjtn5AZZEguRekhHKGCaYEsUzskdM1XcMNU0DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T09:17:49.621294Z","bundle_sha256":"0242d7bfe0113c94ebeff1d5ad2ffe59228c325c93f965f640b094522e011138"}}