{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:H2P7KVOFYSEWF4Z2X325ZXQS33","short_pith_number":"pith:H2P7KVOF","canonical_record":{"source":{"id":"2605.30478","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-28T18:50:00Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"b759529ad807a5319570ee11bc99de4c5e11fdf4b30ece45f5326170c0e577c1","abstract_canon_sha256":"d6599b138c41eeea9d71470927105f13f5e06a1a9982044355a6698aff56d8a8"},"schema_version":"1.0"},"canonical_sha256":"3e9ff555c5c48962f33abef5dcde12deec2a547c3c49244f8e5c5167b57216bd","source":{"kind":"arxiv","id":"2605.30478","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30478","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30478v1","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30478","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"pith_short_12","alias_value":"H2P7KVOFYSEW","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"pith_short_16","alias_value":"H2P7KVOFYSEWF4Z2","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"pith_short_8","alias_value":"H2P7KVOF","created_at":"2026-06-01T01:02:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:H2P7KVOFYSEWF4Z2X325ZXQS33","target":"record","payload":{"canonical_record":{"source":{"id":"2605.30478","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-28T18:50:00Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"b759529ad807a5319570ee11bc99de4c5e11fdf4b30ece45f5326170c0e577c1","abstract_canon_sha256":"d6599b138c41eeea9d71470927105f13f5e06a1a9982044355a6698aff56d8a8"},"schema_version":"1.0"},"canonical_sha256":"3e9ff555c5c48962f33abef5dcde12deec2a547c3c49244f8e5c5167b57216bd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:02:56.388805Z","signature_b64":"NRCt2scg/aLZss30t763PCoiwHwG41nMLE91ro2p3WEgKZ2uKAO6leaSsZrbMvVeOfq/k+iTI6Vc+rAeGKYBDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3e9ff555c5c48962f33abef5dcde12deec2a547c3c49244f8e5c5167b57216bd","last_reissued_at":"2026-06-01T01:02:56.387982Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:02:56.387982Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.30478","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:02:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FdEjM6jBfhvi3l6orp/0OCAN69ae2Qc8E32xQwh9IBWX8pF9eCymWIxCHejIxTjk+VQrnAJaJc3y7WNaf0QtAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T14:58:21.769886Z"},"content_sha256":"1400f6b11f2aa1cb5480be71a1bd2da9c07ebf69312143d8d75ec18e01348905","schema_version":"1.0","event_id":"sha256:1400f6b11f2aa1cb5480be71a1bd2da9c07ebf69312143d8d75ec18e01348905"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:H2P7KVOFYSEWF4Z2X325ZXQS33","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Improving Small Language Models for Code Generation with Reinforcement Learning from Verification Feedback","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.SE","authors_text":"Egor Skopin, Evgeny Kotelnikov","submitted_at":"2026-05-28T18:50:00Z","abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) trains language models using programmatically checkable signals such as unit-test outcomes, enabling direct optimization for functional correctness in code generation. We conduct an empirical study of RLVR for Python code generation on the MBPP benchmark using two small models (Qwen3-0.6B and Llama3.2-1B) with LoRA fine-tuning. Across multiple reward formulations such as: unit-test-only rewards, static-analysis-only shaping via the Ruff linter, and a combined reward, we compare group-based policy optimization variants (GRPO and GSPO) and ev"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30478","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.30478/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:02:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hnQJxZgvwVS3oCIVkErEPPvAjz5JOH1wX8XCBWVpkjhS8YaKSBoYcMw4+yDopasn5TIXmlMhUDOXys5C8vE6CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T14:58:21.770254Z"},"content_sha256":"0d31986f81fb2987b4d17ed60125bacb3a03ab4b2636f088531719ea66b229d4","schema_version":"1.0","event_id":"sha256:0d31986f81fb2987b4d17ed60125bacb3a03ab4b2636f088531719ea66b229d4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/H2P7KVOFYSEWF4Z2X325ZXQS33/bundle.json","state_url":"https://pith.science/pith/H2P7KVOFYSEWF4Z2X325ZXQS33/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/H2P7KVOFYSEWF4Z2X325ZXQS33/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T14:58:21Z","links":{"resolver":"https://pith.science/pith/H2P7KVOFYSEWF4Z2X325ZXQS33","bundle":"https://pith.science/pith/H2P7KVOFYSEWF4Z2X325ZXQS33/bundle.json","state":"https://pith.science/pith/H2P7KVOFYSEWF4Z2X325ZXQS33/state.json","well_known_bundle":"https://pith.science/.well-known/pith/H2P7KVOFYSEWF4Z2X325ZXQS33/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:H2P7KVOFYSEWF4Z2X325ZXQS33","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d6599b138c41eeea9d71470927105f13f5e06a1a9982044355a6698aff56d8a8","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-28T18:50:00Z","title_canon_sha256":"b759529ad807a5319570ee11bc99de4c5e11fdf4b30ece45f5326170c0e577c1"},"schema_version":"1.0","source":{"id":"2605.30478","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30478","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30478v1","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30478","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"pith_short_12","alias_value":"H2P7KVOFYSEW","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"pith_short_16","alias_value":"H2P7KVOFYSEWF4Z2","created_at":"2026-06-01T01:02:56Z"},{"alias_kind":"pith_short_8","alias_value":"H2P7KVOF","created_at":"2026-06-01T01:02:56Z"}],"graph_snapshots":[{"event_id":"sha256:0d31986f81fb2987b4d17ed60125bacb3a03ab4b2636f088531719ea66b229d4","target":"graph","created_at":"2026-06-01T01:02:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30478/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) trains language models using programmatically checkable signals such as unit-test outcomes, enabling direct optimization for functional correctness in code generation. We conduct an empirical study of RLVR for Python code generation on the MBPP benchmark using two small models (Qwen3-0.6B and Llama3.2-1B) with LoRA fine-tuning. Across multiple reward formulations such as: unit-test-only rewards, static-analysis-only shaping via the Ruff linter, and a combined reward, we compare group-based policy optimization variants (GRPO and GSPO) and ev","authors_text":"Egor Skopin, Evgeny Kotelnikov","cross_cats":["cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-28T18:50:00Z","title":"Improving Small Language Models for Code Generation with Reinforcement Learning from Verification Feedback"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30478","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1400f6b11f2aa1cb5480be71a1bd2da9c07ebf69312143d8d75ec18e01348905","target":"record","created_at":"2026-06-01T01:02:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d6599b138c41eeea9d71470927105f13f5e06a1a9982044355a6698aff56d8a8","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-28T18:50:00Z","title_canon_sha256":"b759529ad807a5319570ee11bc99de4c5e11fdf4b30ece45f5326170c0e577c1"},"schema_version":"1.0","source":{"id":"2605.30478","kind":"arxiv","version":1}},"canonical_sha256":"3e9ff555c5c48962f33abef5dcde12deec2a547c3c49244f8e5c5167b57216bd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3e9ff555c5c48962f33abef5dcde12deec2a547c3c49244f8e5c5167b57216bd","first_computed_at":"2026-06-01T01:02:56.387982Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:02:56.387982Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"NRCt2scg/aLZss30t763PCoiwHwG41nMLE91ro2p3WEgKZ2uKAO6leaSsZrbMvVeOfq/k+iTI6Vc+rAeGKYBDA==","signature_status":"signed_v1","signed_at":"2026-06-01T01:02:56.388805Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30478","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1400f6b11f2aa1cb5480be71a1bd2da9c07ebf69312143d8d75ec18e01348905","sha256:0d31986f81fb2987b4d17ed60125bacb3a03ab4b2636f088531719ea66b229d4"],"state_sha256":"d6fbb2ae9e2ba583d96ad85bfc96cd265befc1d8e29e11bfe7971f133bf2ac4d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Do0Yff1+6TfhyztgZWE0rIIp9PmONjKB403S+3oqUXzqZ/j074lmmmt4RBYOMzPbvFuV9n1qFOsK+DNBmk2+CQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T14:58:21.772165Z","bundle_sha256":"f7ee9ba0f466c4cfa52ac3a1f6595305e53957654db3fe4c128b73e9f042b4bc"}}