{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:JR3ARXSNM753VPSYFVDDHM6OXS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"551ae692d6c809564d08c0a45f38ac5c373237d2389169b78e18d36ebccdee77","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T02:25:03Z","title_canon_sha256":"950def92e8c85c4680d333ad4f444f1dfc07928123837a6ac0563168490fc963"},"schema_version":"1.0","source":{"id":"2605.30749","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30749","created_at":"2026-06-01T01:03:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30749v1","created_at":"2026-06-01T01:03:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30749","created_at":"2026-06-01T01:03:14Z"},{"alias_kind":"pith_short_12","alias_value":"JR3ARXSNM753","created_at":"2026-06-01T01:03:14Z"},{"alias_kind":"pith_short_16","alias_value":"JR3ARXSNM753VPSY","created_at":"2026-06-01T01:03:14Z"},{"alias_kind":"pith_short_8","alias_value":"JR3ARXSN","created_at":"2026-06-01T01:03:14Z"}],"graph_snapshots":[{"event_id":"sha256:3f99f4c281a34c59ae2811e6915b339aad348e39213576ccfa8af38719bb0080","target":"graph","created_at":"2026-06-01T01:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30749/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Maximum entropy reinforcement learning (MaxEnt-RL) enables robust exploration, yet practical implementations often restrict policies to simple Gaussians.\n  While recent approaches incorporate expressive generative policies via importance-weighted supervised learning, they are prone to importance weight collapse, which limits their scalability in high-dimensional action spaces.\n  Our key insight is to mitigate this limitation by localizing the sampling region, avoiding the weight degeneracy induced by importance sampling over the entire action space.\n  To instantiate this insight, we introduce ","authors_text":"Daesol Cho, Gawon Lee, H. Jin Kim, Jonghae Park, Jusuk Lee, Sungha Kim","cross_cats":["cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T02:25:03Z","title":"FLAG: Flow Policy MaxEnt-RL by Latent Augmented Guidance"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30749","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:12f5243d418f69c6c9d2079023a27b7f4933b8a29de40a942563822fdad9c72e","target":"record","created_at":"2026-06-01T01:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"551ae692d6c809564d08c0a45f38ac5c373237d2389169b78e18d36ebccdee77","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T02:25:03Z","title_canon_sha256":"950def92e8c85c4680d333ad4f444f1dfc07928123837a6ac0563168490fc963"},"schema_version":"1.0","source":{"id":"2605.30749","kind":"arxiv","version":1}},"canonical_sha256":"4c7608de4d67fbbabe582d4633b3cebc93528361682179b7dfca11cfa7e45563","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4c7608de4d67fbbabe582d4633b3cebc93528361682179b7dfca11cfa7e45563","first_computed_at":"2026-06-01T01:03:14.111889Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:03:14.111889Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"osN7fQQeaFW7CPwrJ0N0BnI8dTcA76SsilAWmMQpBRxImQoXryxItZ+FODNvmgxpGdkJ9pTyRw1wiJ0qVLmYAQ==","signature_status":"signed_v1","signed_at":"2026-06-01T01:03:14.112728Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30749","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:12f5243d418f69c6c9d2079023a27b7f4933b8a29de40a942563822fdad9c72e","sha256:3f99f4c281a34c59ae2811e6915b339aad348e39213576ccfa8af38719bb0080"],"state_sha256":"fa5837abb730a563d24542f2a7d8ec491caf7fc40dd4d921ec5c3034fd9b34a8"}