{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:4OUAAJ6XU2S6PIXSPCVERMSLUT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d89cd4882af4201406a78278133fc3edb06b17e3f971d03d767b837f25845d1c","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-13T17:47:50Z","title_canon_sha256":"2a912bc74357639ee07d0c7e1a4eae21aac60b7831f1f2a7dfc9cbebfe8684bc"},"schema_version":"1.0","source":{"id":"2510.11683","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.11683","created_at":"2026-06-01T02:03:28Z"},{"alias_kind":"arxiv_version","alias_value":"2510.11683v3","created_at":"2026-06-01T02:03:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.11683","created_at":"2026-06-01T02:03:28Z"},{"alias_kind":"pith_short_12","alias_value":"4OUAAJ6XU2S6","created_at":"2026-06-01T02:03:28Z"},{"alias_kind":"pith_short_16","alias_value":"4OUAAJ6XU2S6PIXS","created_at":"2026-06-01T02:03:28Z"},{"alias_kind":"pith_short_8","alias_value":"4OUAAJ6X","created_at":"2026-06-01T02:03:28Z"}],"graph_snapshots":[{"event_id":"sha256:55af0e8e786120846b44bfb196969869d10d3ae2497da80b66e62aabb5b8ffd9","target":"graph","created_at":"2026-06-01T02:03:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.11683/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"A key challenge in applying reinforcement learning (RL) to diffusion large language models (dLLMs) is the intractability of their likelihood functions, which are essential for the RL objective, necessitating corresponding approximation during training. While existing methods approximate the log-likelihoods by their evidence lower bounds (ELBOs) via customized Monte Carlo (MC) sampling, they incur significant memory overhead due to the need to retain all MC samples for the gradient computation of non-linear terms in the RL objective, and thus restrict feasible sample sizes, leading to imprecise","authors_text":"Jiajie Zhang, Juanzi Li, Lei Hou, Nianyi Lin","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-13T17:47:50Z","title":"Boundary-Guided Policy Optimization for Memory-efficient RL of Diffusion Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.11683","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:eab2e20563c3c85867c6f63865a5f568f0ae433b378adba80684d2d3701d3cb6","target":"record","created_at":"2026-06-01T02:03:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d89cd4882af4201406a78278133fc3edb06b17e3f971d03d767b837f25845d1c","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-13T17:47:50Z","title_canon_sha256":"2a912bc74357639ee07d0c7e1a4eae21aac60b7831f1f2a7dfc9cbebfe8684bc"},"schema_version":"1.0","source":{"id":"2510.11683","kind":"arxiv","version":3}},"canonical_sha256":"e3a80027d7a6a5e7a2f278aa48b24ba4db9184fe45ac59ba24beee6ab62b2021","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e3a80027d7a6a5e7a2f278aa48b24ba4db9184fe45ac59ba24beee6ab62b2021","first_computed_at":"2026-06-01T02:03:28.221594Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T02:03:28.221594Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IeHKqEcgwJTyKZJ2J61ykYoEDG3/CXORDbgcrYfh8Xvm5AmK9UzJDmnwFfgGLZN9uxAvPHJ89FRm3nUqOrVNBg==","signature_status":"signed_v1","signed_at":"2026-06-01T02:03:28.222719Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.11683","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:eab2e20563c3c85867c6f63865a5f568f0ae433b378adba80684d2d3701d3cb6","sha256:55af0e8e786120846b44bfb196969869d10d3ae2497da80b66e62aabb5b8ffd9"],"state_sha256":"3590acf35a4024bea4b63c87a5ffd4d19a5cdbab79b85f2699ceebaf52382335"}