{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:HGLUFG2XMQIB6V7EU55QRAE2UH","short_pith_number":"pith:HGLUFG2X","canonical_record":{"source":{"id":"1807.02322","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-07-06T09:15:05Z","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"title_canon_sha256":"f04db341a290a040eb7d581083cf044718f39c40448f1c39a7502165e3947937","abstract_canon_sha256":"92615ab3a4b27a65daacfbab214ffccdc06cd5d7252f4eafcdcc71beeb7d7e89"},"schema_version":"1.0"},"canonical_sha256":"3997429b5764101f57e4a77b08809aa1e5fc496fcfba8f611c2cbd2b49029074","source":{"kind":"arxiv","id":"1807.02322","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.02322","created_at":"2026-05-17T23:56:29Z"},{"alias_kind":"arxiv_version","alias_value":"1807.02322v5","created_at":"2026-05-17T23:56:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.02322","created_at":"2026-05-17T23:56:29Z"},{"alias_kind":"pith_short_12","alias_value":"HGLUFG2XMQIB","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"HGLUFG2XMQIB6V7E","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"HGLUFG2X","created_at":"2026-05-18T12:32:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:HGLUFG2XMQIB6V7EU55QRAE2UH","target":"record","payload":{"canonical_record":{"source":{"id":"1807.02322","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-07-06T09:15:05Z","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"title_canon_sha256":"f04db341a290a040eb7d581083cf044718f39c40448f1c39a7502165e3947937","abstract_canon_sha256":"92615ab3a4b27a65daacfbab214ffccdc06cd5d7252f4eafcdcc71beeb7d7e89"},"schema_version":"1.0"},"canonical_sha256":"3997429b5764101f57e4a77b08809aa1e5fc496fcfba8f611c2cbd2b49029074","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:56:29.686169Z","signature_b64":"H3FIdkd/RGO64MMDdSJCPVmjpJ+FSJr9mWfHzcXpNHmFhfRSG8JvuQiJR+2DWM6Pkeq0IgtIyjDy3KtjyRJmCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3997429b5764101f57e4a77b08809aa1e5fc496fcfba8f611c2cbd2b49029074","last_reissued_at":"2026-05-17T23:56:29.685686Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:56:29.685686Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1807.02322","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:56:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2A8+HKqWmrBQGOua4o/nbTRObvaDCyBKhew2f34XkgkiidCyWIF6f8q2rNSqSuipED+zRXh59fJSjGyswsgGBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T12:30:02.773199Z"},"content_sha256":"95ee9b60a156b75f62a6bea3f53362c76be5eb450c83ea344e4a80259c36d3fe","schema_version":"1.0","event_id":"sha256:95ee9b60a156b75f62a6bea3f53362c76be5eb450c83ea344e4a80259c36d3fe"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:HGLUFG2XMQIB6V7EU55QRAE2UH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Memory Augmented Policy Optimization for Program Synthesis and Semantic Parsing","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Chen Liang, Jonathan Berant, Mohammad Norouzi, Ni Lao, Quoc Le","submitted_at":"2018-07-06T09:15:05Z","abstract_excerpt":"We present Memory Augmented Policy Optimization (MAPO), a simple and novel way to leverage a memory buffer of promising trajectories to reduce the variance of policy gradient estimate. MAPO is applicable to deterministic environments with discrete actions, such as structured prediction and combinatorial optimization tasks. We express the expected return objective as a weighted sum of two terms: an expectation over the high-reward trajectories inside the memory buffer, and a separate expectation over trajectories outside the buffer. To make an efficient algorithm of MAPO, we propose: (1) memory"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.02322","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:56:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QFhbDDJMzvum92d+vy1ET4NXp56AXzPK3CDM0qDB3Dbg7oB1CjR2Z3gNMuF2fLk768T2i26heC4KopAntROkCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T12:30:02.773572Z"},"content_sha256":"2d1a13993fd0f3af6616c56d50db37744b59c31cd200dfb1734612e1fe840318","schema_version":"1.0","event_id":"sha256:2d1a13993fd0f3af6616c56d50db37744b59c31cd200dfb1734612e1fe840318"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HGLUFG2XMQIB6V7EU55QRAE2UH/bundle.json","state_url":"https://pith.science/pith/HGLUFG2XMQIB6V7EU55QRAE2UH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HGLUFG2XMQIB6V7EU55QRAE2UH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T12:30:02Z","links":{"resolver":"https://pith.science/pith/HGLUFG2XMQIB6V7EU55QRAE2UH","bundle":"https://pith.science/pith/HGLUFG2XMQIB6V7EU55QRAE2UH/bundle.json","state":"https://pith.science/pith/HGLUFG2XMQIB6V7EU55QRAE2UH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HGLUFG2XMQIB6V7EU55QRAE2UH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:HGLUFG2XMQIB6V7EU55QRAE2UH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"92615ab3a4b27a65daacfbab214ffccdc06cd5d7252f4eafcdcc71beeb7d7e89","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-07-06T09:15:05Z","title_canon_sha256":"f04db341a290a040eb7d581083cf044718f39c40448f1c39a7502165e3947937"},"schema_version":"1.0","source":{"id":"1807.02322","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.02322","created_at":"2026-05-17T23:56:29Z"},{"alias_kind":"arxiv_version","alias_value":"1807.02322v5","created_at":"2026-05-17T23:56:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.02322","created_at":"2026-05-17T23:56:29Z"},{"alias_kind":"pith_short_12","alias_value":"HGLUFG2XMQIB","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"HGLUFG2XMQIB6V7E","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"HGLUFG2X","created_at":"2026-05-18T12:32:28Z"}],"graph_snapshots":[{"event_id":"sha256:2d1a13993fd0f3af6616c56d50db37744b59c31cd200dfb1734612e1fe840318","target":"graph","created_at":"2026-05-17T23:56:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present Memory Augmented Policy Optimization (MAPO), a simple and novel way to leverage a memory buffer of promising trajectories to reduce the variance of policy gradient estimate. MAPO is applicable to deterministic environments with discrete actions, such as structured prediction and combinatorial optimization tasks. We express the expected return objective as a weighted sum of two terms: an expectation over the high-reward trajectories inside the memory buffer, and a separate expectation over trajectories outside the buffer. To make an efficient algorithm of MAPO, we propose: (1) memory","authors_text":"Chen Liang, Jonathan Berant, Mohammad Norouzi, Ni Lao, Quoc Le","cross_cats":["cs.AI","cs.CL","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-07-06T09:15:05Z","title":"Memory Augmented Policy Optimization for Program Synthesis and Semantic Parsing"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.02322","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:95ee9b60a156b75f62a6bea3f53362c76be5eb450c83ea344e4a80259c36d3fe","target":"record","created_at":"2026-05-17T23:56:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"92615ab3a4b27a65daacfbab214ffccdc06cd5d7252f4eafcdcc71beeb7d7e89","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-07-06T09:15:05Z","title_canon_sha256":"f04db341a290a040eb7d581083cf044718f39c40448f1c39a7502165e3947937"},"schema_version":"1.0","source":{"id":"1807.02322","kind":"arxiv","version":5}},"canonical_sha256":"3997429b5764101f57e4a77b08809aa1e5fc496fcfba8f611c2cbd2b49029074","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3997429b5764101f57e4a77b08809aa1e5fc496fcfba8f611c2cbd2b49029074","first_computed_at":"2026-05-17T23:56:29.685686Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:56:29.685686Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"H3FIdkd/RGO64MMDdSJCPVmjpJ+FSJr9mWfHzcXpNHmFhfRSG8JvuQiJR+2DWM6Pkeq0IgtIyjDy3KtjyRJmCQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:56:29.686169Z","signed_message":"canonical_sha256_bytes"},"source_id":"1807.02322","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:95ee9b60a156b75f62a6bea3f53362c76be5eb450c83ea344e4a80259c36d3fe","sha256:2d1a13993fd0f3af6616c56d50db37744b59c31cd200dfb1734612e1fe840318"],"state_sha256":"f0fca2ef7852c1340deefba317a14894221e3d5e7602a315ea501fb053ff06f9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NokliUQbAcQKA9/0YAY6S5GaDnJHBQsyx0WACxiqh2puIXKcvNOxPHAQeHyOaJW5CNfQ/PpFQ74eNeFkLD8qDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T12:30:02.775639Z","bundle_sha256":"840d8e88e80ede78de1b887615d2b97394dfda92c0fdd2ab680b07cec7bf2f6e"}}