{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:CRERUYD3VYJ6DVTW4JRY5X72EW","short_pith_number":"pith:CRERUYD3","canonical_record":{"source":{"id":"1904.03535","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-06T21:50:24Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"60c085e358400a22297b2821a825c0f1e1828674095cc7804b52143b939eefa2","abstract_canon_sha256":"ea43ca2fd595ec7b1a1627cb0b6f78b121ad83496a76569428ae051db3402684"},"schema_version":"1.0"},"canonical_sha256":"14491a607bae13e1d676e2638edffa25a6ba64bc0cda33e6bf0f6588a621ae8a","source":{"kind":"arxiv","id":"1904.03535","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.03535","created_at":"2026-05-17T23:49:13Z"},{"alias_kind":"arxiv_version","alias_value":"1904.03535v1","created_at":"2026-05-17T23:49:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.03535","created_at":"2026-05-17T23:49:13Z"},{"alias_kind":"pith_short_12","alias_value":"CRERUYD3VYJ6","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"CRERUYD3VYJ6DVTW","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"CRERUYD3","created_at":"2026-05-18T12:33:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:CRERUYD3VYJ6DVTW4JRY5X72EW","target":"record","payload":{"canonical_record":{"source":{"id":"1904.03535","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-06T21:50:24Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"60c085e358400a22297b2821a825c0f1e1828674095cc7804b52143b939eefa2","abstract_canon_sha256":"ea43ca2fd595ec7b1a1627cb0b6f78b121ad83496a76569428ae051db3402684"},"schema_version":"1.0"},"canonical_sha256":"14491a607bae13e1d676e2638edffa25a6ba64bc0cda33e6bf0f6588a621ae8a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:49:13.053995Z","signature_b64":"CdZ8AtSRD17fnEawwTT4qtLG+hlaUCkKg6gMQr/6lWt5IFUB7rZiAKgZpRDLYJ27Hxz9idwzrNnv0Hu9SfVmDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"14491a607bae13e1d676e2638edffa25a6ba64bc0cda33e6bf0f6588a621ae8a","last_reissued_at":"2026-05-17T23:49:13.053305Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:49:13.053305Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.03535","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3Sp7vMciNuMV4FjG5Tvxr+C2hvHk1xzotaNNSOKs3kWInzQIXZ7CRTs8TSOmP+JWQwiQ/Poi3vvJ6/Kik4wbAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T07:27:28.500841Z"},"content_sha256":"dffee1828aa364318f954f0996a38c6f11a0245e0bd625b723fd1aa1fda24da5","schema_version":"1.0","event_id":"sha256:dffee1828aa364318f954f0996a38c6f11a0245e0bd625b723fd1aa1fda24da5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:CRERUYD3VYJ6DVTW4JRY5X72EW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Randomised Bayesian Least-Squares Policy Iteration","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Christos Dimitrakakis, Michalis Vazirgiannis, Nikolaos Tziortziotis","submitted_at":"2019-04-06T21:50:24Z","abstract_excerpt":"We introduce Bayesian least-squares policy iteration (BLSPI), an off-policy, model-free, policy iteration algorithm that uses the Bayesian least-squares temporal-difference (BLSTD) learning algorithm to evaluate policies. An online variant of BLSPI has been also proposed, called randomised BLSPI (RBLSPI), that improves its policy based on an incomplete policy evaluation step. In online setting, the exploration-exploitation dilemma should be addressed as we try to discover the optimal policy by using samples collected by ourselves. RBLSPI exploits the advantage of BLSTD to quantify our uncertai"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.03535","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:49:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MetZMpwFE7QbBJHM9ot/0MA80nH7DLszgRtuy9ufiwaf5PH5kLnEzEz4jboetR1AbMegRCa26XhyFfGfAMEqCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T07:27:28.501181Z"},"content_sha256":"bb0bf39877394011445c859617a08c416c1ee7b224684c210ac26a0f6a9843a9","schema_version":"1.0","event_id":"sha256:bb0bf39877394011445c859617a08c416c1ee7b224684c210ac26a0f6a9843a9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CRERUYD3VYJ6DVTW4JRY5X72EW/bundle.json","state_url":"https://pith.science/pith/CRERUYD3VYJ6DVTW4JRY5X72EW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CRERUYD3VYJ6DVTW4JRY5X72EW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T07:27:28Z","links":{"resolver":"https://pith.science/pith/CRERUYD3VYJ6DVTW4JRY5X72EW","bundle":"https://pith.science/pith/CRERUYD3VYJ6DVTW4JRY5X72EW/bundle.json","state":"https://pith.science/pith/CRERUYD3VYJ6DVTW4JRY5X72EW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CRERUYD3VYJ6DVTW4JRY5X72EW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:CRERUYD3VYJ6DVTW4JRY5X72EW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ea43ca2fd595ec7b1a1627cb0b6f78b121ad83496a76569428ae051db3402684","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-06T21:50:24Z","title_canon_sha256":"60c085e358400a22297b2821a825c0f1e1828674095cc7804b52143b939eefa2"},"schema_version":"1.0","source":{"id":"1904.03535","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.03535","created_at":"2026-05-17T23:49:13Z"},{"alias_kind":"arxiv_version","alias_value":"1904.03535v1","created_at":"2026-05-17T23:49:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.03535","created_at":"2026-05-17T23:49:13Z"},{"alias_kind":"pith_short_12","alias_value":"CRERUYD3VYJ6","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"CRERUYD3VYJ6DVTW","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"CRERUYD3","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:bb0bf39877394011445c859617a08c416c1ee7b224684c210ac26a0f6a9843a9","target":"graph","created_at":"2026-05-17T23:49:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce Bayesian least-squares policy iteration (BLSPI), an off-policy, model-free, policy iteration algorithm that uses the Bayesian least-squares temporal-difference (BLSTD) learning algorithm to evaluate policies. An online variant of BLSPI has been also proposed, called randomised BLSPI (RBLSPI), that improves its policy based on an incomplete policy evaluation step. In online setting, the exploration-exploitation dilemma should be addressed as we try to discover the optimal policy by using samples collected by ourselves. RBLSPI exploits the advantage of BLSTD to quantify our uncertai","authors_text":"Christos Dimitrakakis, Michalis Vazirgiannis, Nikolaos Tziortziotis","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-06T21:50:24Z","title":"Randomised Bayesian Least-Squares Policy Iteration"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.03535","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dffee1828aa364318f954f0996a38c6f11a0245e0bd625b723fd1aa1fda24da5","target":"record","created_at":"2026-05-17T23:49:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ea43ca2fd595ec7b1a1627cb0b6f78b121ad83496a76569428ae051db3402684","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-06T21:50:24Z","title_canon_sha256":"60c085e358400a22297b2821a825c0f1e1828674095cc7804b52143b939eefa2"},"schema_version":"1.0","source":{"id":"1904.03535","kind":"arxiv","version":1}},"canonical_sha256":"14491a607bae13e1d676e2638edffa25a6ba64bc0cda33e6bf0f6588a621ae8a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"14491a607bae13e1d676e2638edffa25a6ba64bc0cda33e6bf0f6588a621ae8a","first_computed_at":"2026-05-17T23:49:13.053305Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:49:13.053305Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"CdZ8AtSRD17fnEawwTT4qtLG+hlaUCkKg6gMQr/6lWt5IFUB7rZiAKgZpRDLYJ27Hxz9idwzrNnv0Hu9SfVmDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:49:13.053995Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.03535","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dffee1828aa364318f954f0996a38c6f11a0245e0bd625b723fd1aa1fda24da5","sha256:bb0bf39877394011445c859617a08c416c1ee7b224684c210ac26a0f6a9843a9"],"state_sha256":"b1778edbc16dd7198e55ca22b1a2e1080b66482a346a37436355208f3ed10a77"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IiZRfJzwDXCm2zrvT+duMhzgAm1iHuy9pn52f6aOAdxMNNT8GNniIgnb1r2mVw7XEwax/SZiXMLOZf6gUJ/lCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T07:27:28.503163Z","bundle_sha256":"1b02d7ab4164b9bce1cdb36f7691d64b743bcb8c93427977cb5a9db7372cc3b9"}}