{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:JEXKL4CBATHAE3MKQGMZPTG5L2","short_pith_number":"pith:JEXKL4CB","canonical_record":{"source":{"id":"2606.18307","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a49c653bb18a6e2f7444259973289940c2eacd4379c4ee7786770f903af2a2f1","abstract_canon_sha256":"c4c975b61043e6f110644f48b193b3c5ef88fe414b13a54ec62437619ae45571"},"schema_version":"1.0"},"canonical_sha256":"492ea5f04104ce026d8a819997ccdd5e9b277d3a8003f3fc8c4343ab8a3bd482","source":{"kind":"arxiv","id":"2606.18307","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.18307","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"arxiv_version","alias_value":"2606.18307v1","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.18307","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_12","alias_value":"JEXKL4CBATHA","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_16","alias_value":"JEXKL4CBATHAE3MK","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_8","alias_value":"JEXKL4CB","created_at":"2026-06-19T16:10:57Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:JEXKL4CBATHAE3MKQGMZPTG5L2","target":"record","payload":{"canonical_record":{"source":{"id":"2606.18307","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a49c653bb18a6e2f7444259973289940c2eacd4379c4ee7786770f903af2a2f1","abstract_canon_sha256":"c4c975b61043e6f110644f48b193b3c5ef88fe414b13a54ec62437619ae45571"},"schema_version":"1.0"},"canonical_sha256":"492ea5f04104ce026d8a819997ccdd5e9b277d3a8003f3fc8c4343ab8a3bd482","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:57.514846Z","signature_b64":"jD0OwISYBLu7eSrcspJLAna21vuW+Pi4GbI1acYLHaUVu8v/AZFWV1F3wKD0hzkiJ7w5TFEIGYwPyslBs9UkBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"492ea5f04104ce026d8a819997ccdd5e9b277d3a8003f3fc8c4343ab8a3bd482","last_reissued_at":"2026-06-19T16:10:57.514511Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:57.514511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.18307","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:10:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6N5ZVuCon/D4D2e6zGP/RnFqrGHsPgZzlndY70L5JX1lH/T0RwD7N0MYXxor4MTDOv5Sk0CRqekQjS2uhFiDDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T16:19:32.187936Z"},"content_sha256":"583903873dcafc610448fae148da31dcf3ddd528860721c36723dbeb9d7b8f3f","schema_version":"1.0","event_id":"sha256:583903873dcafc610448fae148da31dcf3ddd528860721c36723dbeb9d7b8f3f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:JEXKL4CBATHAE3MKQGMZPTG5L2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"DRIFT: Refining Instruction Data via On-Policy Data Attribution","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Lincheng Li, Tianyu Yu, Yuan Yao, Zefan Wang","submitted_at":"2026-06-16T07:21:49Z","abstract_excerpt":"Optimizing the training data distribution for Supervised Fine-Tuning (SFT) dictates the capability of Large Language Models (LLMs). While existing data curation methods excel at accelerating training under constrained budgets, they are less suited to elevating the capability upper bound. The challenge here is no longer to identify a smaller subset that preserves performance, but to refine the data distribution toward instances most capable of improving the final model. To address this problem, we explore instance-level data attribution using Influence Functions (IF). We identify that standard "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.18307","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.18307/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:10:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J2rEN+rDHqdBsngTPUnmzUevAExabnAjsWjVW2SQ2hBhzak1+jOFU1hKFfV62+brNSnCL1jOkvm4FqBouHcEAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T16:19:32.188288Z"},"content_sha256":"45efe505b3c9d249ca5e625f88fb3ae0a70d9d04aa45a3497ab7fdb979258a1d","schema_version":"1.0","event_id":"sha256:45efe505b3c9d249ca5e625f88fb3ae0a70d9d04aa45a3497ab7fdb979258a1d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/JEXKL4CBATHAE3MKQGMZPTG5L2/bundle.json","state_url":"https://pith.science/pith/JEXKL4CBATHAE3MKQGMZPTG5L2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/JEXKL4CBATHAE3MKQGMZPTG5L2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T16:19:32Z","links":{"resolver":"https://pith.science/pith/JEXKL4CBATHAE3MKQGMZPTG5L2","bundle":"https://pith.science/pith/JEXKL4CBATHAE3MKQGMZPTG5L2/bundle.json","state":"https://pith.science/pith/JEXKL4CBATHAE3MKQGMZPTG5L2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/JEXKL4CBATHAE3MKQGMZPTG5L2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:JEXKL4CBATHAE3MKQGMZPTG5L2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c4c975b61043e6f110644f48b193b3c5ef88fe414b13a54ec62437619ae45571","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","title_canon_sha256":"a49c653bb18a6e2f7444259973289940c2eacd4379c4ee7786770f903af2a2f1"},"schema_version":"1.0","source":{"id":"2606.18307","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.18307","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"arxiv_version","alias_value":"2606.18307v1","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.18307","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_12","alias_value":"JEXKL4CBATHA","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_16","alias_value":"JEXKL4CBATHAE3MK","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_8","alias_value":"JEXKL4CB","created_at":"2026-06-19T16:10:57Z"}],"graph_snapshots":[{"event_id":"sha256:45efe505b3c9d249ca5e625f88fb3ae0a70d9d04aa45a3497ab7fdb979258a1d","target":"graph","created_at":"2026-06-19T16:10:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.18307/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Optimizing the training data distribution for Supervised Fine-Tuning (SFT) dictates the capability of Large Language Models (LLMs). While existing data curation methods excel at accelerating training under constrained budgets, they are less suited to elevating the capability upper bound. The challenge here is no longer to identify a smaller subset that preserves performance, but to refine the data distribution toward instances most capable of improving the final model. To address this problem, we explore instance-level data attribution using Influence Functions (IF). We identify that standard ","authors_text":"Lincheng Li, Tianyu Yu, Yuan Yao, Zefan Wang","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","title":"DRIFT: Refining Instruction Data via On-Policy Data Attribution"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.18307","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:583903873dcafc610448fae148da31dcf3ddd528860721c36723dbeb9d7b8f3f","target":"record","created_at":"2026-06-19T16:10:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c4c975b61043e6f110644f48b193b3c5ef88fe414b13a54ec62437619ae45571","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","title_canon_sha256":"a49c653bb18a6e2f7444259973289940c2eacd4379c4ee7786770f903af2a2f1"},"schema_version":"1.0","source":{"id":"2606.18307","kind":"arxiv","version":1}},"canonical_sha256":"492ea5f04104ce026d8a819997ccdd5e9b277d3a8003f3fc8c4343ab8a3bd482","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"492ea5f04104ce026d8a819997ccdd5e9b277d3a8003f3fc8c4343ab8a3bd482","first_computed_at":"2026-06-19T16:10:57.514511Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:10:57.514511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"jD0OwISYBLu7eSrcspJLAna21vuW+Pi4GbI1acYLHaUVu8v/AZFWV1F3wKD0hzkiJ7w5TFEIGYwPyslBs9UkBw==","signature_status":"signed_v1","signed_at":"2026-06-19T16:10:57.514846Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.18307","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:583903873dcafc610448fae148da31dcf3ddd528860721c36723dbeb9d7b8f3f","sha256:45efe505b3c9d249ca5e625f88fb3ae0a70d9d04aa45a3497ab7fdb979258a1d"],"state_sha256":"45f2de81c49b98dadfd10664f995005c80c128268dbc334822fa77ce10cf5656"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LlZj23C5QyKDfPA5zpQtQXAsM9LBOuvVP9JaK+8q6RlC0fCgE8ntohbwzLlQSpC+nTC0bC+QRIq9ewXLhr/hAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T16:19:32.190182Z","bundle_sha256":"aed1671bd29346e3b53b0ed87d0f0b627e8f97b8b6fa2b2c153327f60d8a3fdd"}}