{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:H2SJ2XXZYBWFBUGQ6IZCTHSCWH","short_pith_number":"pith:H2SJ2XXZ","canonical_record":{"source":{"id":"2603.23461","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","cross_cats_sorted":[],"title_canon_sha256":"d7c3fddce558c98d3a038a74daf9a6dc9f160a761f609648c63424181b7a1c58","abstract_canon_sha256":"9877ff221b43811ec9def19602d811a6355f6ffd8653d1045b6c9f80570caea6"},"schema_version":"1.0"},"canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","source":{"kind":"arxiv","id":"2603.23461","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.23461","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"arxiv_version","alias_value":"2603.23461v2","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.23461","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_12","alias_value":"H2SJ2XXZYBWF","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_16","alias_value":"H2SJ2XXZYBWFBUGQ","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_8","alias_value":"H2SJ2XXZ","created_at":"2026-07-01T01:18:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:H2SJ2XXZYBWFBUGQ6IZCTHSCWH","target":"record","payload":{"canonical_record":{"source":{"id":"2603.23461","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","cross_cats_sorted":[],"title_canon_sha256":"d7c3fddce558c98d3a038a74daf9a6dc9f160a761f609648c63424181b7a1c58","abstract_canon_sha256":"9877ff221b43811ec9def19602d811a6355f6ffd8653d1045b6c9f80570caea6"},"schema_version":"1.0"},"canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T01:18:24.547845Z","signature_b64":"UaaPOABxV0nNI3H5AqUshOJ3L5U6W0HOe1nPlGCoDEcNZSzYOdraDUHeeI1CXoBhElLYp6Syw8rXA+0VyUF+AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","last_reissued_at":"2026-07-01T01:18:24.547325Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T01:18:24.547325Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.23461","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-01T01:18:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gUSMQa/PD6GEBSZuhbSlpFukdef9PBB9gzJj7b6Qm4UFRwsbuM7V3ax69VXQ/ke/KM5WdQd149LA+G0gVuPCAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T12:32:14.668109Z"},"content_sha256":"642641cf509c2e49765ec8432a271932c661d00a85df3c9efb55730a9f147f57","schema_version":"1.0","event_id":"sha256:642641cf509c2e49765ec8432a271932c661d00a85df3c9efb55730a9f147f57"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:H2SJ2XXZYBWFBUGQ6IZCTHSCWH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"End-to-End Efficient RL for Linear Bellman Complete MDPs with Deterministic Transitions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Alexander Rakhlin, Nneka Okolo, Zakaria Mhammedi","submitted_at":"2026-03-24T17:32:29Z","abstract_excerpt":"We study reinforcement learning (RL) with linear function approximation in Markov Decision Processes (MDPs) satisfying \\emph{linear Bellman completeness} -- a fundamental setting where the Bellman backup of any linear value function remains linear. While statistically tractable, prior computationally efficient algorithms are either limited to small action spaces or require strong oracle assumptions over the feature space. We provide a computationally efficient algorithm for linear Bellman complete MDPs with \\emph{deterministic transitions}, stochastic initial states, and stochastic rewards. Fo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.23461","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.23461/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-01T01:18:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PmeennY1kjHgwVpvCYbR049dA3b0ZKp6I8QC+gNKFkf/R0l6ifGX27LgQAd2vORODUQ1bDjojfeMyLNZU2XwDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T12:32:14.668492Z"},"content_sha256":"0e1ee793a065b465182b23037b26f808ed5e159841714ca1c50e8e7eba13a3a5","schema_version":"1.0","event_id":"sha256:0e1ee793a065b465182b23037b26f808ed5e159841714ca1c50e8e7eba13a3a5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/bundle.json","state_url":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T12:32:14Z","links":{"resolver":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH","bundle":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/bundle.json","state":"https://pith.science/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/H2SJ2XXZYBWFBUGQ6IZCTHSCWH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:H2SJ2XXZYBWFBUGQ6IZCTHSCWH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9877ff221b43811ec9def19602d811a6355f6ffd8653d1045b6c9f80570caea6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","title_canon_sha256":"d7c3fddce558c98d3a038a74daf9a6dc9f160a761f609648c63424181b7a1c58"},"schema_version":"1.0","source":{"id":"2603.23461","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.23461","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"arxiv_version","alias_value":"2603.23461v2","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.23461","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_12","alias_value":"H2SJ2XXZYBWF","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_16","alias_value":"H2SJ2XXZYBWFBUGQ","created_at":"2026-07-01T01:18:24Z"},{"alias_kind":"pith_short_8","alias_value":"H2SJ2XXZ","created_at":"2026-07-01T01:18:24Z"}],"graph_snapshots":[{"event_id":"sha256:0e1ee793a065b465182b23037b26f808ed5e159841714ca1c50e8e7eba13a3a5","target":"graph","created_at":"2026-07-01T01:18:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.23461/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We study reinforcement learning (RL) with linear function approximation in Markov Decision Processes (MDPs) satisfying \\emph{linear Bellman completeness} -- a fundamental setting where the Bellman backup of any linear value function remains linear. While statistically tractable, prior computationally efficient algorithms are either limited to small action spaces or require strong oracle assumptions over the feature space. We provide a computationally efficient algorithm for linear Bellman complete MDPs with \\emph{deterministic transitions}, stochastic initial states, and stochastic rewards. Fo","authors_text":"Alexander Rakhlin, Nneka Okolo, Zakaria Mhammedi","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","title":"End-to-End Efficient RL for Linear Bellman Complete MDPs with Deterministic Transitions"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.23461","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:642641cf509c2e49765ec8432a271932c661d00a85df3c9efb55730a9f147f57","target":"record","created_at":"2026-07-01T01:18:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9877ff221b43811ec9def19602d811a6355f6ffd8653d1045b6c9f80570caea6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-24T17:32:29Z","title_canon_sha256":"d7c3fddce558c98d3a038a74daf9a6dc9f160a761f609648c63424181b7a1c58"},"schema_version":"1.0","source":{"id":"2603.23461","kind":"arxiv","version":2}},"canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3ea49d5ef9c06c50d0d0f232299e42b1db1591a4137123a023f3727f40fcf9b3","first_computed_at":"2026-07-01T01:18:24.547325Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-01T01:18:24.547325Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UaaPOABxV0nNI3H5AqUshOJ3L5U6W0HOe1nPlGCoDEcNZSzYOdraDUHeeI1CXoBhElLYp6Syw8rXA+0VyUF+AA==","signature_status":"signed_v1","signed_at":"2026-07-01T01:18:24.547845Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.23461","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:642641cf509c2e49765ec8432a271932c661d00a85df3c9efb55730a9f147f57","sha256:0e1ee793a065b465182b23037b26f808ed5e159841714ca1c50e8e7eba13a3a5"],"state_sha256":"52501c22b2eddd03c9d4013d31fdb876ec23dec0553f81414d63dd234c1ba7b9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UAW+6NRi6OEcI23SOdxn+6+eGdxTByQ2EcQSGaUF/JZGv1yCPUZ7XBIwoVo3R28zT59OtvUz6e3HyIKua8w5AQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T12:32:14.670495Z","bundle_sha256":"8cd81a4d3e9b667be594dcbfd400c726122e3d33784f8419c2c50ebf08ef654a"}}