{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3BA52NFJTLFMVFHQMNWVXCSHM7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3cc8104b2bcef539135760261910acc0a86b6b14c964edbe32a4f5ef35eb8242","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-07-01T15:40:12Z","title_canon_sha256":"7bec57620db6c4e71ff39a1b7d597965b98a6e7611740ceb43f52e0f42f9f606"},"schema_version":"1.0","source":{"id":"2607.01083","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.01083","created_at":"2026-07-02T01:18:28Z"},{"alias_kind":"arxiv_version","alias_value":"2607.01083v1","created_at":"2026-07-02T01:18:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.01083","created_at":"2026-07-02T01:18:28Z"},{"alias_kind":"pith_short_12","alias_value":"3BA52NFJTLFM","created_at":"2026-07-02T01:18:28Z"},{"alias_kind":"pith_short_16","alias_value":"3BA52NFJTLFMVFHQ","created_at":"2026-07-02T01:18:28Z"},{"alias_kind":"pith_short_8","alias_value":"3BA52NFJ","created_at":"2026-07-02T01:18:28Z"}],"graph_snapshots":[{"event_id":"sha256:a9809987db02fef2cfa218df18d9d926d1da97dede444c2670f6822045392785","target":"graph","created_at":"2026-07-02T01:18:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2607.01083/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"High-throughput RLHF systems often decouple rollout generation from policy optimization, leading to the use of stale rollouts during learner updates. In this work, we study the effect of such staleness in asynchronous GRPO. We make the behavior policy explicit in the GRPO surrogate objective and distinguish between the surrogate-gradient mapping used by the learner and the true total derivative of a distribution-dependent population objective. Under assumptions of local boundedness, distributional smoothness, and behavior-policy smoothness, we show that stale rollouts introduce a per-step surr","authors_text":"Bill Shi, Chengke Bao, Chuan Wu, Haofeng Xu, Jie Xiao, Jingwei Shi, Jingwei Song, Linfeng Zhang, Pengbin Feng, Weixun Wang, Yuhang Han","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-07-01T15:40:12Z","title":"Staleness-Learning Rate Scaling Laws for Asynchronous RLHF"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.01083","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0cb8ffd25e1c8c60cb6de03fef81c0f21c4a2d9d9637c3a6af9b52fdfdec56d7","target":"record","created_at":"2026-07-02T01:18:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3cc8104b2bcef539135760261910acc0a86b6b14c964edbe32a4f5ef35eb8242","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-07-01T15:40:12Z","title_canon_sha256":"7bec57620db6c4e71ff39a1b7d597965b98a6e7611740ceb43f52e0f42f9f606"},"schema_version":"1.0","source":{"id":"2607.01083","kind":"arxiv","version":1}},"canonical_sha256":"d841dd34a99acaca94f0636d5b8a4767e6a0d0546b317553de6c9d86e9291c39","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d841dd34a99acaca94f0636d5b8a4767e6a0d0546b317553de6c9d86e9291c39","first_computed_at":"2026-07-02T01:18:28.252062Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-02T01:18:28.252062Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"w8gLt8gUQ/6BIRJNQgM4h+y28TON5hqGnbqCHAhaztBYjGV+cHKkCsxN/gfCYFUDXJGJC4sQf9FYRGkVVd5LCw==","signature_status":"signed_v1","signed_at":"2026-07-02T01:18:28.252501Z","signed_message":"canonical_sha256_bytes"},"source_id":"2607.01083","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0cb8ffd25e1c8c60cb6de03fef81c0f21c4a2d9d9637c3a6af9b52fdfdec56d7","sha256:a9809987db02fef2cfa218df18d9d926d1da97dede444c2670f6822045392785"],"state_sha256":"eb44db014ecd83921290240c642eaf91722ebb0f1897a01a291a103796d8d4de"}