{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:Q3OMKDBF5UCMXSAHGGCI52EWXI","short_pith_number":"pith:Q3OMKDBF","schema_version":"1.0","canonical_sha256":"86dcc50c25ed04cbc80731848ee896ba1e307362aa28f3ac2aa7725f3d25a635","source":{"kind":"arxiv","id":"1811.06512","version":1},"attestation_state":"computed","paper":{"title":"Tight Bayesian Ambiguity Sets for Robust MDPs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Marek Petrik, Reazul Hasan Russel","submitted_at":"2018-11-15T18:18:39Z","abstract_excerpt":"Robustness is important for sequential decision making in a stochastic dynamic environment with uncertain probabilistic parameters. We address the problem of using robust MDPs (RMDPs) to compute policies with provable worst-case guarantees in reinforcement learning. The quality and robustness of an RMDP solution is determined by its ambiguity set. Existing methods construct ambiguity sets that lead to impractically conservative solutions. In this paper, we propose RSVF, which achieves less conservative solutions with the same worst-case guarantees by 1) leveraging a Bayesian prior, 2) optimizi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.06512","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-15T18:18:39Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"b7e4fcba72f1e6f9f757648d66277322289c139c81d6380748ccdd4b04003462","abstract_canon_sha256":"9d7385cd99e895d9072692b25a4c0f7f3d06bccb6752dfe41f11254e4544ec22"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:37.562420Z","signature_b64":"rY08l+dz8WkY3hFghqNI/IfugEion58C6vhTDE2yF8XI/sGbKv11YAEvH9Tz26pH/9C2djQEksBJQ804059nBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"86dcc50c25ed04cbc80731848ee896ba1e307362aa28f3ac2aa7725f3d25a635","last_reissued_at":"2026-05-18T00:00:37.561906Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:37.561906Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Tight Bayesian Ambiguity Sets for Robust MDPs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Marek Petrik, Reazul Hasan Russel","submitted_at":"2018-11-15T18:18:39Z","abstract_excerpt":"Robustness is important for sequential decision making in a stochastic dynamic environment with uncertain probabilistic parameters. We address the problem of using robust MDPs (RMDPs) to compute policies with provable worst-case guarantees in reinforcement learning. The quality and robustness of an RMDP solution is determined by its ambiguity set. Existing methods construct ambiguity sets that lead to impractically conservative solutions. In this paper, we propose RSVF, which achieves less conservative solutions with the same worst-case guarantees by 1) leveraging a Bayesian prior, 2) optimizi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.06512","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.06512","created_at":"2026-05-18T00:00:37.561985+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.06512v1","created_at":"2026-05-18T00:00:37.561985+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.06512","created_at":"2026-05-18T00:00:37.561985+00:00"},{"alias_kind":"pith_short_12","alias_value":"Q3OMKDBF5UCM","created_at":"2026-05-18T12:32:46.962924+00:00"},{"alias_kind":"pith_short_16","alias_value":"Q3OMKDBF5UCMXSAH","created_at":"2026-05-18T12:32:46.962924+00:00"},{"alias_kind":"pith_short_8","alias_value":"Q3OMKDBF","created_at":"2026-05-18T12:32:46.962924+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI","json":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI.json","graph_json":"https://pith.science/api/pith-number/Q3OMKDBF5UCMXSAHGGCI52EWXI/graph.json","events_json":"https://pith.science/api/pith-number/Q3OMKDBF5UCMXSAHGGCI52EWXI/events.json","paper":"https://pith.science/paper/Q3OMKDBF"},"agent_actions":{"view_html":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI","download_json":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI.json","view_paper":"https://pith.science/paper/Q3OMKDBF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.06512&json=true","fetch_graph":"https://pith.science/api/pith-number/Q3OMKDBF5UCMXSAHGGCI52EWXI/graph.json","fetch_events":"https://pith.science/api/pith-number/Q3OMKDBF5UCMXSAHGGCI52EWXI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI/action/storage_attestation","attest_author":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI/action/author_attestation","sign_citation":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI/action/citation_signature","submit_replication":"https://pith.science/pith/Q3OMKDBF5UCMXSAHGGCI52EWXI/action/replication_record"}},"created_at":"2026-05-18T00:00:37.561985+00:00","updated_at":"2026-05-18T00:00:37.561985+00:00"}