{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:3FUCNDNRBLAI7I7CJVZPKWTR7W","short_pith_number":"pith:3FUCNDNR","canonical_record":{"source":{"id":"2606.20411","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-18T15:58:48Z","cross_cats_sorted":[],"title_canon_sha256":"b4344d9d1e8122951aff445df0447bbfb5d169d0a57b886f862fa6893a81b9f9","abstract_canon_sha256":"af7d0f6e519c036bdda81fd53d251ba5e64ee47c0757ca3d74d336d8f34b7bb0"},"schema_version":"1.0"},"canonical_sha256":"d968268db10ac08fa3e24d72f55a71fd9ada34a0e8db0a5e69fc1b89b2aa57fe","source":{"kind":"arxiv","id":"2606.20411","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.20411","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"arxiv_version","alias_value":"2606.20411v1","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.20411","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"pith_short_12","alias_value":"3FUCNDNRBLAI","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"pith_short_16","alias_value":"3FUCNDNRBLAI7I7C","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"pith_short_8","alias_value":"3FUCNDNR","created_at":"2026-06-19T16:13:11Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:3FUCNDNRBLAI7I7CJVZPKWTR7W","target":"record","payload":{"canonical_record":{"source":{"id":"2606.20411","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-18T15:58:48Z","cross_cats_sorted":[],"title_canon_sha256":"b4344d9d1e8122951aff445df0447bbfb5d169d0a57b886f862fa6893a81b9f9","abstract_canon_sha256":"af7d0f6e519c036bdda81fd53d251ba5e64ee47c0757ca3d74d336d8f34b7bb0"},"schema_version":"1.0"},"canonical_sha256":"d968268db10ac08fa3e24d72f55a71fd9ada34a0e8db0a5e69fc1b89b2aa57fe","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:13:11.825125Z","signature_b64":"bvq9boiUVGxW2othYVy06Q5ERoXprkysU6QMTBw2mhf2XNJON9fBLRVS2HSQTP3iZXxfzWco9D8OAS2kRNXTCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d968268db10ac08fa3e24d72f55a71fd9ada34a0e8db0a5e69fc1b89b2aa57fe","last_reissued_at":"2026-06-19T16:13:11.824760Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:13:11.824760Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.20411","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:13:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gn97uSYw+JFbXcuo7h2P3fPftY/cCPgjjApedyT223EJNC9f5hhIa/ftlqnK1ZhQLhhbGCA6NuRPsKOUmVBCDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T23:51:46.614262Z"},"content_sha256":"03b60b37b202a604115e62a01b108ae7ea38a62a1c9aff2974c30f596c9ce9c1","schema_version":"1.0","event_id":"sha256:03b60b37b202a604115e62a01b108ae7ea38a62a1c9aff2974c30f596c9ce9c1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:3FUCNDNRBLAI7I7CJVZPKWTR7W","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Direct Advantage Estimation for Scalable and Sample-efficient Deep Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bernhard Sch\\\"olkopf, Hsiao-Ru Pan","submitted_at":"2026-06-18T15:58:48Z","abstract_excerpt":"Direct Advantage Estimation (DAE) has been shown to improve the sample efficiency of deep reinforcement learning algorithms. However, its reliance on full environment observability limits its applicability in realistic settings, and its requirement to model transition probabilities incurs substantial computational overhead for high-dimensional observations. In the present work, we address both limitations. First, we extend the theoretical framework of DAE to partially observable domains with minimal modifications. Second, we reduce its computational complexity by introducing discrete latent dy"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.20411","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.20411/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:13:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5gigrYJcO8VTulZZ5MED//5RZDMLVByc8nIb2LlTHw8ubHtKXnhJzLIhyyOJj7wgt7iNi9qgi8kfXrAIwOdLAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T23:51:46.614915Z"},"content_sha256":"e31de058721c54f28c65a5bbce6f2c97e7ed3e40074868d36d12335e472ed519","schema_version":"1.0","event_id":"sha256:e31de058721c54f28c65a5bbce6f2c97e7ed3e40074868d36d12335e472ed519"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3FUCNDNRBLAI7I7CJVZPKWTR7W/bundle.json","state_url":"https://pith.science/pith/3FUCNDNRBLAI7I7CJVZPKWTR7W/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3FUCNDNRBLAI7I7CJVZPKWTR7W/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-01T23:51:46Z","links":{"resolver":"https://pith.science/pith/3FUCNDNRBLAI7I7CJVZPKWTR7W","bundle":"https://pith.science/pith/3FUCNDNRBLAI7I7CJVZPKWTR7W/bundle.json","state":"https://pith.science/pith/3FUCNDNRBLAI7I7CJVZPKWTR7W/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3FUCNDNRBLAI7I7CJVZPKWTR7W/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3FUCNDNRBLAI7I7CJVZPKWTR7W","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"af7d0f6e519c036bdda81fd53d251ba5e64ee47c0757ca3d74d336d8f34b7bb0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-18T15:58:48Z","title_canon_sha256":"b4344d9d1e8122951aff445df0447bbfb5d169d0a57b886f862fa6893a81b9f9"},"schema_version":"1.0","source":{"id":"2606.20411","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.20411","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"arxiv_version","alias_value":"2606.20411v1","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.20411","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"pith_short_12","alias_value":"3FUCNDNRBLAI","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"pith_short_16","alias_value":"3FUCNDNRBLAI7I7C","created_at":"2026-06-19T16:13:11Z"},{"alias_kind":"pith_short_8","alias_value":"3FUCNDNR","created_at":"2026-06-19T16:13:11Z"}],"graph_snapshots":[{"event_id":"sha256:e31de058721c54f28c65a5bbce6f2c97e7ed3e40074868d36d12335e472ed519","target":"graph","created_at":"2026-06-19T16:13:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.20411/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Direct Advantage Estimation (DAE) has been shown to improve the sample efficiency of deep reinforcement learning algorithms. However, its reliance on full environment observability limits its applicability in realistic settings, and its requirement to model transition probabilities incurs substantial computational overhead for high-dimensional observations. In the present work, we address both limitations. First, we extend the theoretical framework of DAE to partially observable domains with minimal modifications. Second, we reduce its computational complexity by introducing discrete latent dy","authors_text":"Bernhard Sch\\\"olkopf, Hsiao-Ru Pan","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-18T15:58:48Z","title":"Direct Advantage Estimation for Scalable and Sample-efficient Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.20411","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:03b60b37b202a604115e62a01b108ae7ea38a62a1c9aff2974c30f596c9ce9c1","target":"record","created_at":"2026-06-19T16:13:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"af7d0f6e519c036bdda81fd53d251ba5e64ee47c0757ca3d74d336d8f34b7bb0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-18T15:58:48Z","title_canon_sha256":"b4344d9d1e8122951aff445df0447bbfb5d169d0a57b886f862fa6893a81b9f9"},"schema_version":"1.0","source":{"id":"2606.20411","kind":"arxiv","version":1}},"canonical_sha256":"d968268db10ac08fa3e24d72f55a71fd9ada34a0e8db0a5e69fc1b89b2aa57fe","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d968268db10ac08fa3e24d72f55a71fd9ada34a0e8db0a5e69fc1b89b2aa57fe","first_computed_at":"2026-06-19T16:13:11.824760Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:13:11.824760Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bvq9boiUVGxW2othYVy06Q5ERoXprkysU6QMTBw2mhf2XNJON9fBLRVS2HSQTP3iZXxfzWco9D8OAS2kRNXTCg==","signature_status":"signed_v1","signed_at":"2026-06-19T16:13:11.825125Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.20411","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:03b60b37b202a604115e62a01b108ae7ea38a62a1c9aff2974c30f596c9ce9c1","sha256:e31de058721c54f28c65a5bbce6f2c97e7ed3e40074868d36d12335e472ed519"],"state_sha256":"45711c1fe6486d16edaef4c895a6b90a9770700b372e001260e1aaebb1733ae1"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LZUuQoXNoLxbm58pTDbWIRgpP6K0a06ncK3OeGejlduZNEx43DxmI1DHhkEqN3Xz1xc+v9GYP9YCGFhrxrY2CA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-01T23:51:46.618368Z","bundle_sha256":"d7aae980e0809da0343dcad6691a2d7d94c6651f6000bfe6b17e02a97ae56f84"}}