{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SEP75JXLRXT3XDO5HTX4PIJG27","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bc45f0a40a171e2f35b57a64f0d2735ae8c24041fb1cf3a365b3fcb5832103e2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T05:28:36Z","title_canon_sha256":"03e8c3ea111064a5d4bf0454e17ec9df74e80e0a0db16e10e41384a2a472f4b9"},"schema_version":"1.0","source":{"id":"2605.30857","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30857","created_at":"2026-06-01T01:03:21Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30857v1","created_at":"2026-06-01T01:03:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30857","created_at":"2026-06-01T01:03:21Z"},{"alias_kind":"pith_short_12","alias_value":"SEP75JXLRXT3","created_at":"2026-06-01T01:03:21Z"},{"alias_kind":"pith_short_16","alias_value":"SEP75JXLRXT3XDO5","created_at":"2026-06-01T01:03:21Z"},{"alias_kind":"pith_short_8","alias_value":"SEP75JXL","created_at":"2026-06-01T01:03:21Z"}],"graph_snapshots":[{"event_id":"sha256:a826248ba4d9070a14333c181e5e35f05e8e09db0329ecfeeca5018583204396","target":"graph","created_at":"2026-06-01T01:03:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30857/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Instruction fine-tuning is employed to enhance the instruction-following ability of large language models (LLMs). As the amount of instruction fine-tuning data increases, selecting the optimal core set becomes particularly important. However, ensuring the diversity of the core set remains a significant challenge. Existing methods predominantly distinguish different training data based on the text features themselves, decoupled from LLMs' own understanding and representation of the data. To address this issue, we propose a Model-Aware Diverse Core Set Selection method, which distinguishes data ","authors_text":"Jiao Xue, Pengjie Ren, Wenhao Zhang, Yao Chen, Yi Bai, Zhumin Chen","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T05:28:36Z","title":"MADS: Model-Aware Diverse Core Set Selection for Instruction Tuning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30857","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f86aa3553fe9e06dd7015d98f4dfe86387c0c08d7281980d5c7c07930b4357e0","target":"record","created_at":"2026-06-01T01:03:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bc45f0a40a171e2f35b57a64f0d2735ae8c24041fb1cf3a365b3fcb5832103e2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T05:28:36Z","title_canon_sha256":"03e8c3ea111064a5d4bf0454e17ec9df74e80e0a0db16e10e41384a2a472f4b9"},"schema_version":"1.0","source":{"id":"2605.30857","kind":"arxiv","version":1}},"canonical_sha256":"911ffea6eb8de7bb8ddd3cefc7a126d7fbf6958221c217f5d432b863e82d3af2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"911ffea6eb8de7bb8ddd3cefc7a126d7fbf6958221c217f5d432b863e82d3af2","first_computed_at":"2026-06-01T01:03:21.342985Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:03:21.342985Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QvnaHHstj3dFRYVrBR5k0PqpSj3VyuvF0I53U4X9z5wX8RZEvHayrk8eyOWQfQ0D/sGEgdAjo5/XnC/piaN9DA==","signature_status":"signed_v1","signed_at":"2026-06-01T01:03:21.343806Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30857","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f86aa3553fe9e06dd7015d98f4dfe86387c0c08d7281980d5c7c07930b4357e0","sha256:a826248ba4d9070a14333c181e5e35f05e8e09db0329ecfeeca5018583204396"],"state_sha256":"f5ada8a2be2fef221b3caebad1f2ce5dc0e86941fee8ed2d43c596d2a5e72faa"}