{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:FFHGT2J7SND6CGD4XPPIK4PGBL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"353cb427cf6d72134290f1ef38b92e289c43a2964e525df6ddb9e34bf18a28f4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T06:34:17Z","title_canon_sha256":"9ef007e266e57d7c377d4a76ee0b22ff1161ea4cd50c5a699c63b2726b2eb669"},"schema_version":"1.0","source":{"id":"2605.28028","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.28028","created_at":"2026-05-28T01:04:56Z"},{"alias_kind":"arxiv_version","alias_value":"2605.28028v1","created_at":"2026-05-28T01:04:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.28028","created_at":"2026-05-28T01:04:56Z"},{"alias_kind":"pith_short_12","alias_value":"FFHGT2J7SND6","created_at":"2026-05-28T01:04:56Z"},{"alias_kind":"pith_short_16","alias_value":"FFHGT2J7SND6CGD4","created_at":"2026-05-28T01:04:56Z"},{"alias_kind":"pith_short_8","alias_value":"FFHGT2J7","created_at":"2026-05-28T01:04:56Z"}],"graph_snapshots":[{"event_id":"sha256:d2ba6345d120867bb7fa3569909cd5f1e25615f0cf673894adfe79b7e057154d","target":"graph","created_at":"2026-05-28T01:04:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.28028/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Group Relative Policy Optimization (GRPO) is widely used for training reasoning models, but updating all sampled completions in each group incurs substantial cost and can reinforce verbose reasoning trajectories. In this paper, we study whether all completions provide equally useful update signals in GRPO-style reasoning RL. Our gradient-similarity analysis shows that, within the same prompt group, same-class completions often induce highly similar update directions, whereas correct-incorrect pairs provide more distinct contrastive signals. Motivated by this observation, we propose Binary Pref","authors_text":"Huan Song, Jiawei Shao, Qingfei Zhao, Shuyu Tian, Xuelong Li","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T06:34:17Z","title":"BPPO: Binary Prefix Policy Optimization for Efficient GRPO-Style Reasoning RL with Concise Responses"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.28028","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6ec47df6161eef330956bec1c269487f3108ca2be3b10f66282a027ae2b59c4d","target":"record","created_at":"2026-05-28T01:04:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"353cb427cf6d72134290f1ef38b92e289c43a2964e525df6ddb9e34bf18a28f4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-27T06:34:17Z","title_canon_sha256":"9ef007e266e57d7c377d4a76ee0b22ff1161ea4cd50c5a699c63b2726b2eb669"},"schema_version":"1.0","source":{"id":"2605.28028","kind":"arxiv","version":1}},"canonical_sha256":"294e69e93f9347e1187cbbde8571e60ac7951cd3d3ceb9d6120336f05c3a2378","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"294e69e93f9347e1187cbbde8571e60ac7951cd3d3ceb9d6120336f05c3a2378","first_computed_at":"2026-05-28T01:04:56.425298Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:56.425298Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FPIF5iltDbXCGfGZC2NeMzLc9BgsNvbLsVUmr9czIFc/Bc8IN/mCLTPQD2L81zyv805S+RSWgctHoAwyaRY2Aw==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:56.425718Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.28028","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6ec47df6161eef330956bec1c269487f3108ca2be3b10f66282a027ae2b59c4d","sha256:d2ba6345d120867bb7fa3569909cd5f1e25615f0cf673894adfe79b7e057154d"],"state_sha256":"b043211d3d5508103b80d9932258a99421caa345462ad1cead6bd20c52c03c4d"}