{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:EF5LBCQGIOMYR7EKU522MJ3RM4","short_pith_number":"pith:EF5LBCQG","canonical_record":{"source":{"id":"1808.01960","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-06T15:22:13Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"70d49c2f79cb09977d6f5db1e074ba57b931c5b30b0b4780f3b7d8113e59e92b","abstract_canon_sha256":"d622e5a6ca575e249431e7036bc14728d0c41448ac8daa221d9017d706687372"},"schema_version":"1.0"},"canonical_sha256":"217ab08a06439988fc8aa775a6277167151c18d7db63736f5efa8c739eaac725","source":{"kind":"arxiv","id":"1808.01960","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.01960","created_at":"2026-05-18T00:08:49Z"},{"alias_kind":"arxiv_version","alias_value":"1808.01960v1","created_at":"2026-05-18T00:08:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.01960","created_at":"2026-05-18T00:08:49Z"},{"alias_kind":"pith_short_12","alias_value":"EF5LBCQGIOMY","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"EF5LBCQGIOMYR7EK","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"EF5LBCQG","created_at":"2026-05-18T12:32:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:EF5LBCQGIOMYR7EKU522MJ3RM4","target":"record","payload":{"canonical_record":{"source":{"id":"1808.01960","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-06T15:22:13Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"70d49c2f79cb09977d6f5db1e074ba57b931c5b30b0b4780f3b7d8113e59e92b","abstract_canon_sha256":"d622e5a6ca575e249431e7036bc14728d0c41448ac8daa221d9017d706687372"},"schema_version":"1.0"},"canonical_sha256":"217ab08a06439988fc8aa775a6277167151c18d7db63736f5efa8c739eaac725","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:08:49.066703Z","signature_b64":"3N2n9Y6GsdNx1RGC1uhNIx5jSEuB+TI15R+TfxZ9zwSwZ20qE/lSzjeOtUWprLONZ5Xj1kpgOOlB5MY6Xk6HCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"217ab08a06439988fc8aa775a6277167151c18d7db63736f5efa8c739eaac725","last_reissued_at":"2026-05-18T00:08:49.066070Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:08:49.066070Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1808.01960","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ssf9rxNalpDOir8qdQx7eWSW+0CP0SJfqjR8UXSWeXX/Gnr9wxbU95o5z/gM3MeL7aifxWMEmf6Y5WocnfOXAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T07:19:25.863914Z"},"content_sha256":"47edb54e6ed794071cda34137ef82312654878f33889353fe8a7ee80263cd5f7","schema_version":"1.0","event_id":"sha256:47edb54e6ed794071cda34137ef82312654878f33889353fe8a7ee80263cd5f7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:EF5LBCQGIOMYR7EKU522MJ3RM4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Distributional Multivariate Policy Evaluation and Exploration with the Bellman GAN","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Aviv Tamar, Dror Freirich, Ron Meir","submitted_at":"2018-08-06T15:22:13Z","abstract_excerpt":"The recently proposed distributional approach to reinforcement learning (DiRL) is centered on learning the distribution of the reward-to-go, often referred to as the value distribution. In this work, we show that the distributional Bellman equation, which drives DiRL methods, is equivalent to a generative adversarial network (GAN) model. In this formulation, DiRL can be seen as learning a deep generative model of the value distribution, driven by the discrepancy between the distribution of the current value, and the distribution of the sum of current reward and next value. We use this insight "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.01960","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:08:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mafOYUDf5KJ+ZXg4MJuBXFXIGoNpZ2HGEecr0coz71ByMnYy60u+SamRTQP3OJA9iJZRPrwGFDVSKGpJSSDcAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T07:19:25.864250Z"},"content_sha256":"27d4447b73a1250e7e0ed681483ec5537c4b375a398e124399f44732f31a1221","schema_version":"1.0","event_id":"sha256:27d4447b73a1250e7e0ed681483ec5537c4b375a398e124399f44732f31a1221"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EF5LBCQGIOMYR7EKU522MJ3RM4/bundle.json","state_url":"https://pith.science/pith/EF5LBCQGIOMYR7EKU522MJ3RM4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EF5LBCQGIOMYR7EKU522MJ3RM4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-25T07:19:25Z","links":{"resolver":"https://pith.science/pith/EF5LBCQGIOMYR7EKU522MJ3RM4","bundle":"https://pith.science/pith/EF5LBCQGIOMYR7EKU522MJ3RM4/bundle.json","state":"https://pith.science/pith/EF5LBCQGIOMYR7EKU522MJ3RM4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EF5LBCQGIOMYR7EKU522MJ3RM4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:EF5LBCQGIOMYR7EKU522MJ3RM4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d622e5a6ca575e249431e7036bc14728d0c41448ac8daa221d9017d706687372","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-06T15:22:13Z","title_canon_sha256":"70d49c2f79cb09977d6f5db1e074ba57b931c5b30b0b4780f3b7d8113e59e92b"},"schema_version":"1.0","source":{"id":"1808.01960","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.01960","created_at":"2026-05-18T00:08:49Z"},{"alias_kind":"arxiv_version","alias_value":"1808.01960v1","created_at":"2026-05-18T00:08:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.01960","created_at":"2026-05-18T00:08:49Z"},{"alias_kind":"pith_short_12","alias_value":"EF5LBCQGIOMY","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"EF5LBCQGIOMYR7EK","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"EF5LBCQG","created_at":"2026-05-18T12:32:22Z"}],"graph_snapshots":[{"event_id":"sha256:27d4447b73a1250e7e0ed681483ec5537c4b375a398e124399f44732f31a1221","target":"graph","created_at":"2026-05-18T00:08:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The recently proposed distributional approach to reinforcement learning (DiRL) is centered on learning the distribution of the reward-to-go, often referred to as the value distribution. In this work, we show that the distributional Bellman equation, which drives DiRL methods, is equivalent to a generative adversarial network (GAN) model. In this formulation, DiRL can be seen as learning a deep generative model of the value distribution, driven by the discrepancy between the distribution of the current value, and the distribution of the sum of current reward and next value. We use this insight ","authors_text":"Aviv Tamar, Dror Freirich, Ron Meir","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-06T15:22:13Z","title":"Distributional Multivariate Policy Evaluation and Exploration with the Bellman GAN"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.01960","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:47edb54e6ed794071cda34137ef82312654878f33889353fe8a7ee80263cd5f7","target":"record","created_at":"2026-05-18T00:08:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d622e5a6ca575e249431e7036bc14728d0c41448ac8daa221d9017d706687372","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-06T15:22:13Z","title_canon_sha256":"70d49c2f79cb09977d6f5db1e074ba57b931c5b30b0b4780f3b7d8113e59e92b"},"schema_version":"1.0","source":{"id":"1808.01960","kind":"arxiv","version":1}},"canonical_sha256":"217ab08a06439988fc8aa775a6277167151c18d7db63736f5efa8c739eaac725","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"217ab08a06439988fc8aa775a6277167151c18d7db63736f5efa8c739eaac725","first_computed_at":"2026-05-18T00:08:49.066070Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:08:49.066070Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3N2n9Y6GsdNx1RGC1uhNIx5jSEuB+TI15R+TfxZ9zwSwZ20qE/lSzjeOtUWprLONZ5Xj1kpgOOlB5MY6Xk6HCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:08:49.066703Z","signed_message":"canonical_sha256_bytes"},"source_id":"1808.01960","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:47edb54e6ed794071cda34137ef82312654878f33889353fe8a7ee80263cd5f7","sha256:27d4447b73a1250e7e0ed681483ec5537c4b375a398e124399f44732f31a1221"],"state_sha256":"b49e444d4daf44c5bba3fcf6b901d23498fcaac31af73b4faab4f1ee200afe49"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IjmbAoIhJEeRqmgWwa/tc22g7QhpI+6ZhEBJJUcn+5xPd3g35iMrzQIUXheddo553Xydactls3JUd6nHAj/ACw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-25T07:19:25.866200Z","bundle_sha256":"8bba5a3ddeba213056cd091baa9594328c5924e2f001bc52fe950ea89ffc0c67"}}