{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:57EGOQ3X6J6I6JCHHEKGGSZPYS","short_pith_number":"pith:57EGOQ3X","canonical_record":{"source":{"id":"2502.08938","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-13T03:38:41Z","cross_cats_sorted":[],"title_canon_sha256":"b7fd0075bb834a3793f598115ec0c454caac6a2dbb8d60248af748a4af6dc0a2","abstract_canon_sha256":"8482854ce6eb49178ce34a9915880308da2d387cff50e46b256e3a201d09f454"},"schema_version":"1.0"},"canonical_sha256":"efc8674377f27c8f24473914634b2fc4912547ce6e0622d6d850e182c2b1e0ad","source":{"kind":"arxiv","id":"2502.08938","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.08938","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"arxiv_version","alias_value":"2502.08938v4","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.08938","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"pith_short_12","alias_value":"57EGOQ3X6J6I","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"pith_short_16","alias_value":"57EGOQ3X6J6I6JCH","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"pith_short_8","alias_value":"57EGOQ3X","created_at":"2026-05-28T01:04:26Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:57EGOQ3X6J6I6JCHHEKGGSZPYS","target":"record","payload":{"canonical_record":{"source":{"id":"2502.08938","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-13T03:38:41Z","cross_cats_sorted":[],"title_canon_sha256":"b7fd0075bb834a3793f598115ec0c454caac6a2dbb8d60248af748a4af6dc0a2","abstract_canon_sha256":"8482854ce6eb49178ce34a9915880308da2d387cff50e46b256e3a201d09f454"},"schema_version":"1.0"},"canonical_sha256":"efc8674377f27c8f24473914634b2fc4912547ce6e0622d6d850e182c2b1e0ad","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:26.711463Z","signature_b64":"9X1kf6NFgzEqVR2JY1fcxs/Ai2G1bxviUrH8/006BCuHCYbGkJFiSqRvRzNZSOcRC0rbOkdsD8elS1XgrhJNAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"efc8674377f27c8f24473914634b2fc4912547ce6e0622d6d850e182c2b1e0ad","last_reissued_at":"2026-05-28T01:04:26.710880Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:26.710880Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2502.08938","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"X33MRwd3FfRVqdj/2XVbKqQNaLglA9cRF153dehRgzYYL4DOxPo6lsT1qRPKfhyZ5n+TSBLx5v9NabYFW3qvAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T15:07:59.749584Z"},"content_sha256":"f4da4fd30d0683c58e56cce0523c720e6b1e2c8094db626695e13e17eb0d521c","schema_version":"1.0","event_id":"sha256:f4da4fd30d0683c58e56cce0523c720e6b1e2c8094db626695e13e17eb0d521c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:57EGOQ3X6J6I6JCHHEKGGSZPYS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reevaluating Policy Gradient Methods for Imperfect-Information Games","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Alexandre Bayen, Amy Zhang, Eugene Vinitsky, Gabriele Farina, J. Zico Kolter, Max Rudolph, Nathan Lichtle, Samuel Sokota, Sobhan Mohammadpour","submitted_at":"2025-02-13T03:38:41Z","abstract_excerpt":"In the past decade, motivated by the putative failure of naive self-play deep reinforcement learning (DRL) in adversarial imperfect-information games, researchers have developed numerous DRL algorithms based on fictitious play (FP), double oracle (DO), and counterfactual regret minimization (CFR). In light of recent results of the magnetic mirror descent algorithm, we hypothesize that simpler generic policy gradient methods like PPO are competitive with or superior to these FP-, DO-, and CFR-based DRL approaches. To facilitate the resolution of this hypothesis, we implement and release the fir"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.08938","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2502.08938/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wghmkAGja/wEbo7CdNY6E6gEIlqkM0SOjm0v971pvnufVNirHZlnvudJ3ygzqDlGz7ra24D95K9ElGXKT3ZmDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T15:07:59.750409Z"},"content_sha256":"c515594ee647ccf0b3121df6c5512b62100496c0c39d4046e755a5f4e54e6c95","schema_version":"1.0","event_id":"sha256:c515594ee647ccf0b3121df6c5512b62100496c0c39d4046e755a5f4e54e6c95"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/57EGOQ3X6J6I6JCHHEKGGSZPYS/bundle.json","state_url":"https://pith.science/pith/57EGOQ3X6J6I6JCHHEKGGSZPYS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/57EGOQ3X6J6I6JCHHEKGGSZPYS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T15:07:59Z","links":{"resolver":"https://pith.science/pith/57EGOQ3X6J6I6JCHHEKGGSZPYS","bundle":"https://pith.science/pith/57EGOQ3X6J6I6JCHHEKGGSZPYS/bundle.json","state":"https://pith.science/pith/57EGOQ3X6J6I6JCHHEKGGSZPYS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/57EGOQ3X6J6I6JCHHEKGGSZPYS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:57EGOQ3X6J6I6JCHHEKGGSZPYS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8482854ce6eb49178ce34a9915880308da2d387cff50e46b256e3a201d09f454","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-13T03:38:41Z","title_canon_sha256":"b7fd0075bb834a3793f598115ec0c454caac6a2dbb8d60248af748a4af6dc0a2"},"schema_version":"1.0","source":{"id":"2502.08938","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.08938","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"arxiv_version","alias_value":"2502.08938v4","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.08938","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"pith_short_12","alias_value":"57EGOQ3X6J6I","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"pith_short_16","alias_value":"57EGOQ3X6J6I6JCH","created_at":"2026-05-28T01:04:26Z"},{"alias_kind":"pith_short_8","alias_value":"57EGOQ3X","created_at":"2026-05-28T01:04:26Z"}],"graph_snapshots":[{"event_id":"sha256:c515594ee647ccf0b3121df6c5512b62100496c0c39d4046e755a5f4e54e6c95","target":"graph","created_at":"2026-05-28T01:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2502.08938/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In the past decade, motivated by the putative failure of naive self-play deep reinforcement learning (DRL) in adversarial imperfect-information games, researchers have developed numerous DRL algorithms based on fictitious play (FP), double oracle (DO), and counterfactual regret minimization (CFR). In light of recent results of the magnetic mirror descent algorithm, we hypothesize that simpler generic policy gradient methods like PPO are competitive with or superior to these FP-, DO-, and CFR-based DRL approaches. To facilitate the resolution of this hypothesis, we implement and release the fir","authors_text":"Alexandre Bayen, Amy Zhang, Eugene Vinitsky, Gabriele Farina, J. Zico Kolter, Max Rudolph, Nathan Lichtle, Samuel Sokota, Sobhan Mohammadpour","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-13T03:38:41Z","title":"Reevaluating Policy Gradient Methods for Imperfect-Information Games"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.08938","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f4da4fd30d0683c58e56cce0523c720e6b1e2c8094db626695e13e17eb0d521c","target":"record","created_at":"2026-05-28T01:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8482854ce6eb49178ce34a9915880308da2d387cff50e46b256e3a201d09f454","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-13T03:38:41Z","title_canon_sha256":"b7fd0075bb834a3793f598115ec0c454caac6a2dbb8d60248af748a4af6dc0a2"},"schema_version":"1.0","source":{"id":"2502.08938","kind":"arxiv","version":4}},"canonical_sha256":"efc8674377f27c8f24473914634b2fc4912547ce6e0622d6d850e182c2b1e0ad","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"efc8674377f27c8f24473914634b2fc4912547ce6e0622d6d850e182c2b1e0ad","first_computed_at":"2026-05-28T01:04:26.710880Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:26.710880Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9X1kf6NFgzEqVR2JY1fcxs/Ai2G1bxviUrH8/006BCuHCYbGkJFiSqRvRzNZSOcRC0rbOkdsD8elS1XgrhJNAg==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:26.711463Z","signed_message":"canonical_sha256_bytes"},"source_id":"2502.08938","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f4da4fd30d0683c58e56cce0523c720e6b1e2c8094db626695e13e17eb0d521c","sha256:c515594ee647ccf0b3121df6c5512b62100496c0c39d4046e755a5f4e54e6c95"],"state_sha256":"2ee6a19bc1454a69a0d310e26dcf97d1fb0b2e7b5971a588090bb5db7df08cfb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mv3Nwwgi+/jblaWFBbURAmAT74MJ1QMeqp93XS4oisbzUSrLup4yBYcmMmHEveSI0BsRT66RTISWxn4kyQOMBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T15:07:59.754615Z","bundle_sha256":"8a4bfea0c1a11d2d0d1a742bb87b37e13dee40731d89f254bf789a69322a154b"}}