{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:GCKRMTXFY54TIY236BBCHRC5KA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"273f9a2f9c9f6f7f09bbec71c9a85917d5d5065692d31d9c7a210cf5f24d6422","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-16T06:59:52Z","title_canon_sha256":"f27c14e1194659c707d299ba216b954da90bbdfca46497d6700c04e4ab8e144b"},"schema_version":"1.0","source":{"id":"2604.14698","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.14698","created_at":"2026-06-02T01:03:47Z"},{"alias_kind":"arxiv_version","alias_value":"2604.14698v2","created_at":"2026-06-02T01:03:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.14698","created_at":"2026-06-02T01:03:47Z"},{"alias_kind":"pith_short_12","alias_value":"GCKRMTXFY54T","created_at":"2026-06-02T01:03:47Z"},{"alias_kind":"pith_short_16","alias_value":"GCKRMTXFY54TIY23","created_at":"2026-06-02T01:03:47Z"},{"alias_kind":"pith_short_8","alias_value":"GCKRMTXF","created_at":"2026-06-02T01:03:47Z"}],"graph_snapshots":[{"event_id":"sha256:c000898be3849608678feb066a2ccbf05f8084dc4b0b6c83096aec4784cc149c","target":"graph","created_at":"2026-06-02T01:03:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experiments on MuJoCo and DeepMind Control Suite benchmarks demonstrate that our method, Mean Flow Policy Optimization (MFPO), achieves performance comparable to or exceeding current diffusion-based baselines while considerably reducing training and inference time."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the two MeanFlow-specific challenges (action likelihood evaluation and soft policy improvement) can be solved without introducing new instabilities or bias that would undermine the maximum-entropy guarantees."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Mean Flow Policy Optimization (MFPO) uses few-step flow-based models for RL policies and achieves performance on par with or better than diffusion-based methods while substantially lowering training and inference time on MuJoCo and DeepMind Control Suite."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Mean Flow Policy Optimization uses few-step flow models to represent RL policies, matching diffusion performance while cutting training and inference time."}],"snapshot_sha256":"4cf8cd8f4c237ffa7f0748b52b909b2017bba487e7980dbda6c106639a1542f1"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.14698/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Diffusion models have recently emerged as expressive policy representations for online reinforcement learning (RL). However, their iterative generative processes introduce substantial training and inference overhead. To overcome this limitation, we propose to represent policies using MeanFlow models, a class of few-step flow-based generative models, to improve training and inference efficiency over diffusion-based RL approaches. To promote exploration, we optimize MeanFlow policies under the maximum entropy RL framework via soft policy iteration, and address two key challenges specific to Mean","authors_text":"Jian Cheng, Xiaoyi Dong, Xi Sheryl Zhang","cross_cats":[],"headline":"Mean Flow Policy Optimization uses few-step flow models to represent RL policies, matching diffusion performance while cutting training and inference time.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-16T06:59:52Z","title":"Mean Flow Policy Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.14698","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T11:11:28.096185Z","id":"dece871e-7229-40e7-a9a1-40326e462db2","model_set":{"reader":"grok-4.3"},"one_line_summary":"Mean Flow Policy Optimization (MFPO) uses few-step flow-based models for RL policies and achieves performance on par with or better than diffusion-based methods while substantially lowering training and inference time on MuJoCo and DeepMind Control Suite.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Mean Flow Policy Optimization uses few-step flow models to represent RL policies, matching diffusion performance while cutting training and inference time.","strongest_claim":"Experiments on MuJoCo and DeepMind Control Suite benchmarks demonstrate that our method, Mean Flow Policy Optimization (MFPO), achieves performance comparable to or exceeding current diffusion-based baselines while considerably reducing training and inference time.","weakest_assumption":"That the two MeanFlow-specific challenges (action likelihood evaluation and soft policy improvement) can be solved without introducing new instabilities or bias that would undermine the maximum-entropy guarantees."}},"verdict_id":"dece871e-7229-40e7-a9a1-40326e462db2"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dabd50a1c4642e23c1bb6de3f3044f61e691010f38c492fabe0071087369af6d","target":"record","created_at":"2026-06-02T01:03:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"273f9a2f9c9f6f7f09bbec71c9a85917d5d5065692d31d9c7a210cf5f24d6422","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-16T06:59:52Z","title_canon_sha256":"f27c14e1194659c707d299ba216b954da90bbdfca46497d6700c04e4ab8e144b"},"schema_version":"1.0","source":{"id":"2604.14698","kind":"arxiv","version":2}},"canonical_sha256":"3095164ee5c77934635bf04223c45d501ce57a3bb5430e0d8a08d69180339aa1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3095164ee5c77934635bf04223c45d501ce57a3bb5430e0d8a08d69180339aa1","first_computed_at":"2026-06-02T01:03:47.086410Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T01:03:47.086410Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mQMjBvuCUeFu3DAa4iCNs6yfUHJ5KwCPAUZG3ni3b391p9pZGUDl7MJiCnZ73an5sCmNKrugm2584FViiNqkDw==","signature_status":"signed_v1","signed_at":"2026-06-02T01:03:47.086958Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.14698","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dabd50a1c4642e23c1bb6de3f3044f61e691010f38c492fabe0071087369af6d","sha256:c000898be3849608678feb066a2ccbf05f8084dc4b0b6c83096aec4784cc149c"],"state_sha256":"dc634cda3c3aa07b7660d892fe8d163c1ef1a6ca9af4fa1a809792a1faedd672"}