{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:LX45CCPRPDKH5OFJYRTF2SCSQI","short_pith_number":"pith:LX45CCPR","canonical_record":{"source":{"id":"2606.08480","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-07T06:51:18Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"71e8f89f06e24e702cd2611aee92323805fac099bc9a58fe51a1c8ed16a9db8a","abstract_canon_sha256":"430b08676c5110afa775b8570addf9c22ee6470f0b9204f2707df126a30b0eb2"},"schema_version":"1.0"},"canonical_sha256":"5df9d109f178d47eb8a9c4665d4852821ed312113a9e68c9892ce6d7f2ddd90f","source":{"kind":"arxiv","id":"2606.08480","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08480","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08480v1","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08480","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"pith_short_12","alias_value":"LX45CCPRPDKH","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"pith_short_16","alias_value":"LX45CCPRPDKH5OFJ","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"pith_short_8","alias_value":"LX45CCPR","created_at":"2026-06-09T01:05:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:LX45CCPRPDKH5OFJYRTF2SCSQI","target":"record","payload":{"canonical_record":{"source":{"id":"2606.08480","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-07T06:51:18Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"71e8f89f06e24e702cd2611aee92323805fac099bc9a58fe51a1c8ed16a9db8a","abstract_canon_sha256":"430b08676c5110afa775b8570addf9c22ee6470f0b9204f2707df126a30b0eb2"},"schema_version":"1.0"},"canonical_sha256":"5df9d109f178d47eb8a9c4665d4852821ed312113a9e68c9892ce6d7f2ddd90f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:37.880129Z","signature_b64":"QrKYUkH2EA856MAI0iYJfzE0SSEqHVqcno+TpQIQXA2SNafl0DmBh6Dhs5oEPgIHmD6PQ/L8/8fQdvbKHewmDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5df9d109f178d47eb8a9c4665d4852821ed312113a9e68c9892ce6d7f2ddd90f","last_reissued_at":"2026-06-09T01:05:37.879708Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:37.879708Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.08480","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:05:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UO+R4pAOyJimLqNLXjJAYhAyiAtEr46YvLYFoWnPrOAB92UcD7i0BiD5lkhlfVlr8UOw6BXSCUzVEpuiL3XeCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T03:26:14.911486Z"},"content_sha256":"f2338206899b279df500a1935833a65117669772402609a218e4458408fe32d7","schema_version":"1.0","event_id":"sha256:f2338206899b279df500a1935833a65117669772402609a218e4458408fe32d7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:LX45CCPRPDKH5OFJYRTF2SCSQI","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Adaptive Loss Balancing for Noise-Robust GRPO in Generative Recommendation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.LG","authors_text":"Junbo Qi, Kewei Xu, Pengfei Zhang, Shengjie Li, Xingzhi Yao, Yanyan Zou","submitted_at":"2026-06-07T06:51:18Z","abstract_excerpt":"Reinforcement learning (RL) presents a promising avenue for enhancing generative recommendation beyond supervised imitation, leveraging reward signals to guide policy improvement. However, its efficacy is critically contingent on the trustworthiness of the reward model for the samples it evaluates. In practice, production rankers, the widely adopted reward models, are trained on exposure-biased logs, leading to sample-dependent inaccuracies that violate this assumption. Our stratified analysis uncovers a consistent pattern: reward guidance is most beneficial when the policy exhibits uncertaint"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08480","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08480/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:05:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IUrafqU+f9djK2EH96Wn+vD+BIIH/p6/FRBS/Ws1qAlRLAJU8MLuKiNm3vakZpBHjaNKrYTt6DaZe2o7nzRZCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T03:26:14.911863Z"},"content_sha256":"6d593e0f4e0440dd029cdf3ba9cd97d1accd89f991782223394d8ef5b048bee4","schema_version":"1.0","event_id":"sha256:6d593e0f4e0440dd029cdf3ba9cd97d1accd89f991782223394d8ef5b048bee4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LX45CCPRPDKH5OFJYRTF2SCSQI/bundle.json","state_url":"https://pith.science/pith/LX45CCPRPDKH5OFJYRTF2SCSQI/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LX45CCPRPDKH5OFJYRTF2SCSQI/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-03T03:26:14Z","links":{"resolver":"https://pith.science/pith/LX45CCPRPDKH5OFJYRTF2SCSQI","bundle":"https://pith.science/pith/LX45CCPRPDKH5OFJYRTF2SCSQI/bundle.json","state":"https://pith.science/pith/LX45CCPRPDKH5OFJYRTF2SCSQI/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LX45CCPRPDKH5OFJYRTF2SCSQI/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:LX45CCPRPDKH5OFJYRTF2SCSQI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"430b08676c5110afa775b8570addf9c22ee6470f0b9204f2707df126a30b0eb2","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-07T06:51:18Z","title_canon_sha256":"71e8f89f06e24e702cd2611aee92323805fac099bc9a58fe51a1c8ed16a9db8a"},"schema_version":"1.0","source":{"id":"2606.08480","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08480","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08480v1","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08480","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"pith_short_12","alias_value":"LX45CCPRPDKH","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"pith_short_16","alias_value":"LX45CCPRPDKH5OFJ","created_at":"2026-06-09T01:05:37Z"},{"alias_kind":"pith_short_8","alias_value":"LX45CCPR","created_at":"2026-06-09T01:05:37Z"}],"graph_snapshots":[{"event_id":"sha256:6d593e0f4e0440dd029cdf3ba9cd97d1accd89f991782223394d8ef5b048bee4","target":"graph","created_at":"2026-06-09T01:05:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.08480/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning (RL) presents a promising avenue for enhancing generative recommendation beyond supervised imitation, leveraging reward signals to guide policy improvement. However, its efficacy is critically contingent on the trustworthiness of the reward model for the samples it evaluates. In practice, production rankers, the widely adopted reward models, are trained on exposure-biased logs, leading to sample-dependent inaccuracies that violate this assumption. Our stratified analysis uncovers a consistent pattern: reward guidance is most beneficial when the policy exhibits uncertaint","authors_text":"Junbo Qi, Kewei Xu, Pengfei Zhang, Shengjie Li, Xingzhi Yao, Yanyan Zou","cross_cats":["cs.AI","cs.IR"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-07T06:51:18Z","title":"Adaptive Loss Balancing for Noise-Robust GRPO in Generative Recommendation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08480","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f2338206899b279df500a1935833a65117669772402609a218e4458408fe32d7","target":"record","created_at":"2026-06-09T01:05:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"430b08676c5110afa775b8570addf9c22ee6470f0b9204f2707df126a30b0eb2","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-07T06:51:18Z","title_canon_sha256":"71e8f89f06e24e702cd2611aee92323805fac099bc9a58fe51a1c8ed16a9db8a"},"schema_version":"1.0","source":{"id":"2606.08480","kind":"arxiv","version":1}},"canonical_sha256":"5df9d109f178d47eb8a9c4665d4852821ed312113a9e68c9892ce6d7f2ddd90f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5df9d109f178d47eb8a9c4665d4852821ed312113a9e68c9892ce6d7f2ddd90f","first_computed_at":"2026-06-09T01:05:37.879708Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:05:37.879708Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QrKYUkH2EA856MAI0iYJfzE0SSEqHVqcno+TpQIQXA2SNafl0DmBh6Dhs5oEPgIHmD6PQ/L8/8fQdvbKHewmDQ==","signature_status":"signed_v1","signed_at":"2026-06-09T01:05:37.880129Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.08480","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f2338206899b279df500a1935833a65117669772402609a218e4458408fe32d7","sha256:6d593e0f4e0440dd029cdf3ba9cd97d1accd89f991782223394d8ef5b048bee4"],"state_sha256":"bde274171de3b226786aa1be027cc33eddac3feeef0f11d5f88ebb916c6a5ed8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ewlc7GCajH0w8NQt9k4PV0L1/ImqXWiaGJrK7v7S+jPWSxTHUzwydSr1bmEXM58AmlGMrcSF2pc5VaVWApMwCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-03T03:26:14.915815Z","bundle_sha256":"b87787c4e7422bd43eef9a6d67a01c3833626a843c41be332ccc68f77d87b972"}}