{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:H2SJ2XXZYBWFBUGQ6IZCTHSCWH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9877ff221b43811ec9def19602d811a6355f6ffd8653d1045b6c9f80570caea6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","title_canon_sha256":"d7c3fddce558c98d3a038a74daf9a6dc9f160a761f609648c63424181b7a1c58"},"schema_version":"1.0","source":{"id":"2603.23461","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.23461","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"arxiv_version","alias_value":"2603.23461v2","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.23461","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_12","alias_value":"H2SJ2XXZYBWF","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_16","alias_value":"H2SJ2XXZYBWFBUGQ","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_8","alias_value":"H2SJ2XXZ","created_at":"2026-07-01T01:18:24Z"}],"graph_snapshots":[{"event_id":"sha256:0e1ee793a065b465182b23037b26f808ed5e159841714ca1c50e8e7eba13a3a5","target":"graph","created_at":"2026-07-01T01:18:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.23461/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We study reinforcement learning (RL) with linear function approximation in Markov Decision Processes (MDPs) satisfying \\emph{linear Bellman completeness} -- a fundamental setting where the Bellman backup of any linear value function remains linear. While statistically tractable, prior computationally efficient algorithms are either limited to small action spaces or require strong oracle assumptions over the feature space. We provide a computationally efficient algorithm for linear Bellman complete MDPs with \\emph{deterministic transitions}, stochastic initial states, and stochastic rewards. Fo","authors_text":"Alexander Rakhlin, Nneka Okolo, Zakaria Mhammedi","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","title":"End-to-End Efficient RL for Linear Bellman Complete MDPs with Deterministic Transitions"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.23461","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:642641cf509c2e49765ec8432a271932c661d00a85df3c9efb55730a9f147f57","target":"record","created_at":"2026-07-01T01:18:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9877ff221b43811ec9def19602d811a6355f6ffd8653d1045b6c9f80570caea6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","title_canon_sha256":"d7c3fddce558c98d3a038a74daf9a6dc9f160a761f609648c63424181b7a1c58"},"schema_version":"1.0","source":{"id":"2603.23461","kind":"arxiv","version":2}},"canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","first_computed_at":"2026-07-01T01:18:24.547325Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-01T01:18:24.547325Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UaaPOABxV0nNI3H5AqUshOJ3L5U6W0HOe1nPlGCoDEcNZSzYOdraDUHeeI1CXoBhElLYp6Syw8rXA+0VyUF+AA==","signature_status":"signed_v1","signed_at":"2026-07-01T01:18:24.547845Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.23461","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:642641cf509c2e49765ec8432a271932c661d00a85df3c9efb55730a9f147f57","sha256:0e1ee793a065b465182b23037b26f808ed5e159841714ca1c50e8e7eba13a3a5"],"state_sha256":"52501c22b2eddd03c9d4013d31fdb876ec23dec0553f81414d63dd234c1ba7b9"}