{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:PYK2EXYIGMLBLJUYFEUTNIQ4GG","short_pith_number":"pith:PYK2EXYI","canonical_record":{"source":{"id":"1205.3054","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-05-14T15:01:31Z","cross_cats_sorted":[],"title_canon_sha256":"9fb72c1adf0a2f9ecc62be179bfe84de485053698408a2edd2531a3d2a91c014","abstract_canon_sha256":"ba612c5b06ef15d4a4ae92fcf68301f0aa95af8503d2b9638c187cf2c89be9b3"},"schema_version":"1.0"},"canonical_sha256":"7e15a25f08331615a698292936a21c3191f06a4a0d919e808ed1ccc9b0c934f9","source":{"kind":"arxiv","id":"1205.3054","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1205.3054","created_at":"2026-05-18T03:55:22Z"},{"alias_kind":"arxiv_version","alias_value":"1205.3054v2","created_at":"2026-05-18T03:55:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1205.3054","created_at":"2026-05-18T03:55:22Z"},{"alias_kind":"pith_short_12","alias_value":"PYK2EXYIGMLB","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_16","alias_value":"PYK2EXYIGMLBLJUY","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_8","alias_value":"PYK2EXYI","created_at":"2026-05-18T12:27:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:PYK2EXYIGMLBLJUYFEUTNIQ4GG","target":"record","payload":{"canonical_record":{"source":{"id":"1205.3054","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-05-14T15:01:31Z","cross_cats_sorted":[],"title_canon_sha256":"9fb72c1adf0a2f9ecc62be179bfe84de485053698408a2edd2531a3d2a91c014","abstract_canon_sha256":"ba612c5b06ef15d4a4ae92fcf68301f0aa95af8503d2b9638c187cf2c89be9b3"},"schema_version":"1.0"},"canonical_sha256":"7e15a25f08331615a698292936a21c3191f06a4a0d919e808ed1ccc9b0c934f9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:55:22.975079Z","signature_b64":"0eODfVJ465sQKhae8v7zCQiwI79+zeMOQILPzLPAp8heYcDCZhsyzv+Y1AA2Ro7lvYafWr4lJXWAk2+1TQ0+Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7e15a25f08331615a698292936a21c3191f06a4a0d919e808ed1ccc9b0c934f9","last_reissued_at":"2026-05-18T03:55:22.974472Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:55:22.974472Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1205.3054","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:55:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RE04ZvTPvAqhGJfc1fHNLpsrkXYM5MHlNxVJc4U+yXhAH4gEiVaDDTHTJ80A534XEV/CEacrc1MVVzsVuqUNAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-22T12:46:57.444177Z"},"content_sha256":"f058d9509062c3eb4e783e8ee9094c433c2020c993067bde333153b7d173ca47","schema_version":"1.0","event_id":"sha256:f058d9509062c3eb4e783e8ee9094c433c2020c993067bde333153b7d173ca47"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:PYK2EXYIGMLBLJUYFEUTNIQ4GG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Approximate Modified Policy Iteration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Bruno Scherrer (INRIA Lorraine - LORIA), Matthieu Geist (UMI2958), Mohammad Ghavamzadeh (INRIA Lille - Nord Europe), Victor Gabillon (INRIA Lille - Nord Europe)","submitted_at":"2012-05-14T15:01:31Z","abstract_excerpt":"Modified policy iteration (MPI) is a dynamic programming (DP) algorithm that contains the two celebrated policy and value iteration methods. Despite its generality, MPI has not been thoroughly studied, especially its approximation form which is used when the state and/or action spaces are large or infinite. In this paper, we propose three implementations of approximate MPI (AMPI) that are extensions of well-known approximate DP algorithms: fitted-value iteration, fitted-Q iteration, and classification-based policy iteration. We provide error propagation analyses that unify those for approximat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1205.3054","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:55:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"R2KGbjqeM5KtNcx5Wo0ww0t9mqFbbFkfiDkMOGtmAK/m9oEiDjR+iFe8MVfYCqV8NpdAhXicPNyP+it5vSpRCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-22T12:46:57.444544Z"},"content_sha256":"0f916f2c08daced58cff004560f1f009b2c1492c30383a9e28c0c47b61637732","schema_version":"1.0","event_id":"sha256:0f916f2c08daced58cff004560f1f009b2c1492c30383a9e28c0c47b61637732"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PYK2EXYIGMLBLJUYFEUTNIQ4GG/bundle.json","state_url":"https://pith.science/pith/PYK2EXYIGMLBLJUYFEUTNIQ4GG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PYK2EXYIGMLBLJUYFEUTNIQ4GG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-22T12:46:57Z","links":{"resolver":"https://pith.science/pith/PYK2EXYIGMLBLJUYFEUTNIQ4GG","bundle":"https://pith.science/pith/PYK2EXYIGMLBLJUYFEUTNIQ4GG/bundle.json","state":"https://pith.science/pith/PYK2EXYIGMLBLJUYFEUTNIQ4GG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PYK2EXYIGMLBLJUYFEUTNIQ4GG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:PYK2EXYIGMLBLJUYFEUTNIQ4GG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ba612c5b06ef15d4a4ae92fcf68301f0aa95af8503d2b9638c187cf2c89be9b3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-05-14T15:01:31Z","title_canon_sha256":"9fb72c1adf0a2f9ecc62be179bfe84de485053698408a2edd2531a3d2a91c014"},"schema_version":"1.0","source":{"id":"1205.3054","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1205.3054","created_at":"2026-05-18T03:55:22Z"},{"alias_kind":"arxiv_version","alias_value":"1205.3054v2","created_at":"2026-05-18T03:55:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1205.3054","created_at":"2026-05-18T03:55:22Z"},{"alias_kind":"pith_short_12","alias_value":"PYK2EXYIGMLB","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_16","alias_value":"PYK2EXYIGMLBLJUY","created_at":"2026-05-18T12:27:18Z"},{"alias_kind":"pith_short_8","alias_value":"PYK2EXYI","created_at":"2026-05-18T12:27:18Z"}],"graph_snapshots":[{"event_id":"sha256:0f916f2c08daced58cff004560f1f009b2c1492c30383a9e28c0c47b61637732","target":"graph","created_at":"2026-05-18T03:55:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Modified policy iteration (MPI) is a dynamic programming (DP) algorithm that contains the two celebrated policy and value iteration methods. Despite its generality, MPI has not been thoroughly studied, especially its approximation form which is used when the state and/or action spaces are large or infinite. In this paper, we propose three implementations of approximate MPI (AMPI) that are extensions of well-known approximate DP algorithms: fitted-value iteration, fitted-Q iteration, and classification-based policy iteration. We provide error propagation analyses that unify those for approximat","authors_text":"Bruno Scherrer (INRIA Lorraine - LORIA), Matthieu Geist (UMI2958), Mohammad Ghavamzadeh (INRIA Lille - Nord Europe), Victor Gabillon (INRIA Lille - Nord Europe)","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-05-14T15:01:31Z","title":"Approximate Modified Policy Iteration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1205.3054","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f058d9509062c3eb4e783e8ee9094c433c2020c993067bde333153b7d173ca47","target":"record","created_at":"2026-05-18T03:55:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ba612c5b06ef15d4a4ae92fcf68301f0aa95af8503d2b9638c187cf2c89be9b3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-05-14T15:01:31Z","title_canon_sha256":"9fb72c1adf0a2f9ecc62be179bfe84de485053698408a2edd2531a3d2a91c014"},"schema_version":"1.0","source":{"id":"1205.3054","kind":"arxiv","version":2}},"canonical_sha256":"7e15a25f08331615a698292936a21c3191f06a4a0d919e808ed1ccc9b0c934f9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7e15a25f08331615a698292936a21c3191f06a4a0d919e808ed1ccc9b0c934f9","first_computed_at":"2026-05-18T03:55:22.974472Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:55:22.974472Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0eODfVJ465sQKhae8v7zCQiwI79+zeMOQILPzLPAp8heYcDCZhsyzv+Y1AA2Ro7lvYafWr4lJXWAk2+1TQ0+Ag==","signature_status":"signed_v1","signed_at":"2026-05-18T03:55:22.975079Z","signed_message":"canonical_sha256_bytes"},"source_id":"1205.3054","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f058d9509062c3eb4e783e8ee9094c433c2020c993067bde333153b7d173ca47","sha256:0f916f2c08daced58cff004560f1f009b2c1492c30383a9e28c0c47b61637732"],"state_sha256":"b1d622d07daa6cdd2cff498d5e1b90a6f83b8d59672868ab853d5cd6a2010cca"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JAc+6lAweF/c1HjgNfwSeMKATB8906vPpkuP3RQLtAb+eaxvkKBF6796Py3fWXZg7tPhNKyz5YKoJH1QRTSTAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-22T12:46:57.446791Z","bundle_sha256":"72fab89ebc5f08657de86c2bf63214d6261d848237fdc86ba09c64c99527d452"}}