{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:SHMFMILY2R5C6GFPRG3QBDGXPF","short_pith_number":"pith:SHMFMILY","canonical_record":{"source":{"id":"2605.16302","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T13:33:54Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"a1a302f69d43e413ad3f992624aefaeb0012e9e2ca729481304693e919dbfb2a","abstract_canon_sha256":"7c7641de61c7ee7343100f473a5f1deb8c5ef278cd35dcb9f8dc211974ec541c"},"schema_version":"1.0"},"canonical_sha256":"91d8562178d47a2f18af89b7008cd77956206dc9923ee5786a16b951ca8d20c7","source":{"kind":"arxiv","id":"2605.16302","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16302","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16302v1","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16302","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"pith_short_12","alias_value":"SHMFMILY2R5C","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"pith_short_16","alias_value":"SHMFMILY2R5C6GFP","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"pith_short_8","alias_value":"SHMFMILY","created_at":"2026-05-20T00:02:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:SHMFMILY2R5C6GFPRG3QBDGXPF","target":"record","payload":{"canonical_record":{"source":{"id":"2605.16302","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T13:33:54Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"a1a302f69d43e413ad3f992624aefaeb0012e9e2ca729481304693e919dbfb2a","abstract_canon_sha256":"7c7641de61c7ee7343100f473a5f1deb8c5ef278cd35dcb9f8dc211974ec541c"},"schema_version":"1.0"},"canonical_sha256":"91d8562178d47a2f18af89b7008cd77956206dc9923ee5786a16b951ca8d20c7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:16.094573Z","signature_b64":"67rUwL3UDn6AS8fcQLhpmkHFs5xQR9W4fC/GgyqCQPaaKpJ1vyx6Db4nR4qv2j5eHCkTlWf3Uj+NkirVAVMeAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"91d8562178d47a2f18af89b7008cd77956206dc9923ee5786a16b951ca8d20c7","last_reissued_at":"2026-05-20T00:02:16.093937Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:16.093937Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.16302","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yTrHN7LCg8rZ8dtc7gBhFjMWfbWFRs2CdPlwZ4qkEdC0XNS3W2J0jpiUQFEfJP6m5zad+obNJPUooauOeubnBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T23:44:45.702947Z"},"content_sha256":"4c2aab9b1b9d1f43f0809131e9b4e75adf94a3c0b6c9ada916d2c2b16961ff15","schema_version":"1.0","event_id":"sha256:4c2aab9b1b9d1f43f0809131e9b4e75adf94a3c0b6c9ada916d2c2b16961ff15"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:SHMFMILY2R5C6GFPRG3QBDGXPF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reducing Credit Assignment Variance via Counterfactual Reasoning Paths","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Fei Ding, Guoxiong Zhou, Yeling Peng, Yongkang Zhang, youwei wang, Zijian Zeng","submitted_at":"2026-04-20T13:33:54Z","abstract_excerpt":"Reinforcement learning for multi-step reasoning with large language models (LLMs) often relies on sparse terminal rewards, leading to poor credit assignment conditions where the final feedback is evenly propagated across all intermediate decisions. This results in high gradient variance, unstable training, and numerous ineffective updates, ultimately causing the model to fail and preventing sustained improvement. We introduce a counterfactual comparison-based credit assignment framework, which samples multiple reasoning trajectories under the same input. By treating their differences as an imp"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16302","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16302/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qlxul4xbnrHvG2ORmzRjKZTSZFp3yOY6q79gVi4+t5fLUVnJJOiuB/VHFMj5bC+jBJgSKEOB13XIK/l1zCM9AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T23:44:45.703364Z"},"content_sha256":"ce76b1d90dfe587a22d783aa4e5b90df70ca005e7582069f0cecda0e7909c13c","schema_version":"1.0","event_id":"sha256:ce76b1d90dfe587a22d783aa4e5b90df70ca005e7582069f0cecda0e7909c13c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SHMFMILY2R5C6GFPRG3QBDGXPF/bundle.json","state_url":"https://pith.science/pith/SHMFMILY2R5C6GFPRG3QBDGXPF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SHMFMILY2R5C6GFPRG3QBDGXPF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T23:44:45Z","links":{"resolver":"https://pith.science/pith/SHMFMILY2R5C6GFPRG3QBDGXPF","bundle":"https://pith.science/pith/SHMFMILY2R5C6GFPRG3QBDGXPF/bundle.json","state":"https://pith.science/pith/SHMFMILY2R5C6GFPRG3QBDGXPF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SHMFMILY2R5C6GFPRG3QBDGXPF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SHMFMILY2R5C6GFPRG3QBDGXPF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7c7641de61c7ee7343100f473a5f1deb8c5ef278cd35dcb9f8dc211974ec541c","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T13:33:54Z","title_canon_sha256":"a1a302f69d43e413ad3f992624aefaeb0012e9e2ca729481304693e919dbfb2a"},"schema_version":"1.0","source":{"id":"2605.16302","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16302","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16302v1","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16302","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"pith_short_12","alias_value":"SHMFMILY2R5C","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"pith_short_16","alias_value":"SHMFMILY2R5C6GFP","created_at":"2026-05-20T00:02:16Z"},{"alias_kind":"pith_short_8","alias_value":"SHMFMILY","created_at":"2026-05-20T00:02:16Z"}],"graph_snapshots":[{"event_id":"sha256:ce76b1d90dfe587a22d783aa4e5b90df70ca005e7582069f0cecda0e7909c13c","target":"graph","created_at":"2026-05-20T00:02:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.16302/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning for multi-step reasoning with large language models (LLMs) often relies on sparse terminal rewards, leading to poor credit assignment conditions where the final feedback is evenly propagated across all intermediate decisions. This results in high gradient variance, unstable training, and numerous ineffective updates, ultimately causing the model to fail and preventing sustained improvement. We introduce a counterfactual comparison-based credit assignment framework, which samples multiple reasoning trajectories under the same input. By treating their differences as an imp","authors_text":"Fei Ding, Guoxiong Zhou, Yeling Peng, Yongkang Zhang, youwei wang, Zijian Zeng","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T13:33:54Z","title":"Reducing Credit Assignment Variance via Counterfactual Reasoning Paths"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16302","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4c2aab9b1b9d1f43f0809131e9b4e75adf94a3c0b6c9ada916d2c2b16961ff15","target":"record","created_at":"2026-05-20T00:02:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7c7641de61c7ee7343100f473a5f1deb8c5ef278cd35dcb9f8dc211974ec541c","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-20T13:33:54Z","title_canon_sha256":"a1a302f69d43e413ad3f992624aefaeb0012e9e2ca729481304693e919dbfb2a"},"schema_version":"1.0","source":{"id":"2605.16302","kind":"arxiv","version":1}},"canonical_sha256":"91d8562178d47a2f18af89b7008cd77956206dc9923ee5786a16b951ca8d20c7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"91d8562178d47a2f18af89b7008cd77956206dc9923ee5786a16b951ca8d20c7","first_computed_at":"2026-05-20T00:02:16.093937Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:02:16.093937Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"67rUwL3UDn6AS8fcQLhpmkHFs5xQR9W4fC/GgyqCQPaaKpJ1vyx6Db4nR4qv2j5eHCkTlWf3Uj+NkirVAVMeAA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:02:16.094573Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.16302","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4c2aab9b1b9d1f43f0809131e9b4e75adf94a3c0b6c9ada916d2c2b16961ff15","sha256:ce76b1d90dfe587a22d783aa4e5b90df70ca005e7582069f0cecda0e7909c13c"],"state_sha256":"d9dca9eadf33f27449ec4ef2322015a30c5ebe29ce9b81ce6e0c80ae476c64b5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VsLC+AGN43QZy4xvK0ZlTWYy/JZVLHH7+SgRqX9gIu6v3RBV8dTAM6bhrb/FcJVVMHES/W7ibp92r2uz1VjEBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T23:44:45.705579Z","bundle_sha256":"c61730102e6fba2f668284b19704ddaf9215e8cf9d743166b8a5b4c04b3d2ad8"}}