{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:RI56Q7VI6KGT64HXSK7TLNDKFL","short_pith_number":"pith:RI56Q7VI","schema_version":"1.0","canonical_sha256":"8a3be87ea8f28d3f70f792bf35b46a2af8672918d66d221056d17f75cc920fb2","source":{"kind":"arxiv","id":"2606.23271","version":1},"attestation_state":"computed","paper":{"title":"Scaling LLM Knowledge Boundaries via Distribution-Optimized Synthesis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chengtao Gan, Enpei Niu, Huajun Chen, Jingdian Lu, Jun Zhou, Lei Liang, Liangyurui Liu, Menghao Qian, Songze Li, Wen Zhang, Xiangjin Hu, Xiaoke Guo, Yarong Lan, Yuanxiang Liu, Yuan Yuan, Zhaoyan Gong, Zhaoyang Wang, Zhiqiang Liu, Zhongpu Bo","submitted_at":"2026-06-22T12:50:00Z","abstract_excerpt":"Knowledge injection via synthetic data is crucial for enhancing Large Language Models (LLMs). However, current synthesis methods simply stop at preset token counts or fixed data ratios, lacking awareness of knowledge distribution. This results in some domains being sparse while others are redundant, limiting LLM knowledge boundaries. We revisit knowledge injection from a distribution perspective and hypothesize that an optimal knowledge distribution exists to maximize knowledge boundary expansion. We propose KDoS (Knowledge Distribution-optimized Synthesis), a framework that introduces knowled"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.23271","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-22T12:50:00Z","cross_cats_sorted":[],"title_canon_sha256":"5216427425dcb0690b3a65cdacadda5502df32a8b0f3d51e67e5fa8c668ef078","abstract_canon_sha256":"b9ff1bb6735cc264e21db9f16a0740e648ca4c338c26e8d5163957cbb701c875"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:14:15.424179Z","signature_b64":"CpH3YZTxpm27TgSPoKKrrA8mAF62R+HmtpIXjTRa3etap0Q/zAFDZ2XOXpL22KQ+HpjSXLy9SQK2IeENs3mKAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8a3be87ea8f28d3f70f792bf35b46a2af8672918d66d221056d17f75cc920fb2","last_reissued_at":"2026-06-23T03:14:15.423679Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:14:15.423679Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Scaling LLM Knowledge Boundaries via Distribution-Optimized Synthesis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chengtao Gan, Enpei Niu, Huajun Chen, Jingdian Lu, Jun Zhou, Lei Liang, Liangyurui Liu, Menghao Qian, Songze Li, Wen Zhang, Xiangjin Hu, Xiaoke Guo, Yarong Lan, Yuanxiang Liu, Yuan Yuan, Zhaoyan Gong, Zhaoyang Wang, Zhiqiang Liu, Zhongpu Bo","submitted_at":"2026-06-22T12:50:00Z","abstract_excerpt":"Knowledge injection via synthetic data is crucial for enhancing Large Language Models (LLMs). However, current synthesis methods simply stop at preset token counts or fixed data ratios, lacking awareness of knowledge distribution. This results in some domains being sparse while others are redundant, limiting LLM knowledge boundaries. We revisit knowledge injection from a distribution perspective and hypothesize that an optimal knowledge distribution exists to maximize knowledge boundary expansion. We propose KDoS (Knowledge Distribution-optimized Synthesis), a framework that introduces knowled"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.23271","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.23271/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.23271","created_at":"2026-06-23T03:14:15.423742+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.23271v1","created_at":"2026-06-23T03:14:15.423742+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.23271","created_at":"2026-06-23T03:14:15.423742+00:00"},{"alias_kind":"pith_short_12","alias_value":"RI56Q7VI6KGT","created_at":"2026-06-23T03:14:15.423742+00:00"},{"alias_kind":"pith_short_16","alias_value":"RI56Q7VI6KGT64HX","created_at":"2026-06-23T03:14:15.423742+00:00"},{"alias_kind":"pith_short_8","alias_value":"RI56Q7VI","created_at":"2026-06-23T03:14:15.423742+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL","json":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL.json","graph_json":"https://pith.science/api/pith-number/RI56Q7VI6KGT64HXSK7TLNDKFL/graph.json","events_json":"https://pith.science/api/pith-number/RI56Q7VI6KGT64HXSK7TLNDKFL/events.json","paper":"https://pith.science/paper/RI56Q7VI"},"agent_actions":{"view_html":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL","download_json":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL.json","view_paper":"https://pith.science/paper/RI56Q7VI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.23271&json=true","fetch_graph":"https://pith.science/api/pith-number/RI56Q7VI6KGT64HXSK7TLNDKFL/graph.json","fetch_events":"https://pith.science/api/pith-number/RI56Q7VI6KGT64HXSK7TLNDKFL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL/action/storage_attestation","attest_author":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL/action/author_attestation","sign_citation":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL/action/citation_signature","submit_replication":"https://pith.science/pith/RI56Q7VI6KGT64HXSK7TLNDKFL/action/replication_record"}},"created_at":"2026-06-23T03:14:15.423742+00:00","updated_at":"2026-06-23T03:14:15.423742+00:00"}