{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:EDZTKGEMYFOSLTX34ET5KQD3RS","short_pith_number":"pith:EDZTKGEM","schema_version":"1.0","canonical_sha256":"20f335188cc15d25cefbe127d5407b8c8d2fd5b0737bf292e831be38d23afcf3","source":{"kind":"arxiv","id":"2606.20659","version":1},"attestation_state":"computed","paper":{"title":"Skill Coverage: A Test Adequacy Metric for Agent Skills","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","cs.SE"],"primary_cat":"cs.AI","authors_text":"Boyin Tan, Xiaowei Huang, Youcheng Sun","submitted_at":"2026-06-09T10:16:05Z","abstract_excerpt":"Agent skills encode reusable procedural knowledge that guides large language model agents across tasks and execution contexts. Existing evaluations primarily assess skills through task level outcomes, yet task success alone does not reveal which parts of a skill have been exercised or which remain untested. We introduce skill coverage, a test adequacy metric that treats the skill artifact as the object under test. Our approach extracts observable skill behavior constraints from skill documents and measures whether an agent trajectory provides sufficient evidence to exercise and evaluate each c"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.20659","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-09T10:16:05Z","cross_cats_sorted":["cs.LG","cs.SE"],"title_canon_sha256":"d09c2809e53c0dd753fa2052c0ec5199e4504a61724d0cdd766f04bdaff7c31d","abstract_canon_sha256":"fb2a7af113dcc0a36db3e6f53e634fc02e1c65e922faceee5275afab8cbb0959"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T00:11:52.823229Z","signature_b64":"lUz0WghWEEzAIdt3OdseGJAU8MXT0+bendS73dQdjAM0RsxGZGOPGM8SgofZrvCDKpApIwpupyVlY1EQv40HBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"20f335188cc15d25cefbe127d5407b8c8d2fd5b0737bf292e831be38d23afcf3","last_reissued_at":"2026-06-23T00:11:52.822846Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T00:11:52.822846Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Skill Coverage: A Test Adequacy Metric for Agent Skills","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","cs.SE"],"primary_cat":"cs.AI","authors_text":"Boyin Tan, Xiaowei Huang, Youcheng Sun","submitted_at":"2026-06-09T10:16:05Z","abstract_excerpt":"Agent skills encode reusable procedural knowledge that guides large language model agents across tasks and execution contexts. Existing evaluations primarily assess skills through task level outcomes, yet task success alone does not reveal which parts of a skill have been exercised or which remain untested. We introduce skill coverage, a test adequacy metric that treats the skill artifact as the object under test. Our approach extracts observable skill behavior constraints from skill documents and measures whether an agent trajectory provides sufficient evidence to exercise and evaluate each c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.20659","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.20659/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.20659","created_at":"2026-06-23T00:11:52.822908+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.20659v1","created_at":"2026-06-23T00:11:52.822908+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.20659","created_at":"2026-06-23T00:11:52.822908+00:00"},{"alias_kind":"pith_short_12","alias_value":"EDZTKGEMYFOS","created_at":"2026-06-23T00:11:52.822908+00:00"},{"alias_kind":"pith_short_16","alias_value":"EDZTKGEMYFOSLTX3","created_at":"2026-06-23T00:11:52.822908+00:00"},{"alias_kind":"pith_short_8","alias_value":"EDZTKGEM","created_at":"2026-06-23T00:11:52.822908+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS","json":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS.json","graph_json":"https://pith.science/api/pith-number/EDZTKGEMYFOSLTX34ET5KQD3RS/graph.json","events_json":"https://pith.science/api/pith-number/EDZTKGEMYFOSLTX34ET5KQD3RS/events.json","paper":"https://pith.science/paper/EDZTKGEM"},"agent_actions":{"view_html":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS","download_json":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS.json","view_paper":"https://pith.science/paper/EDZTKGEM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.20659&json=true","fetch_graph":"https://pith.science/api/pith-number/EDZTKGEMYFOSLTX34ET5KQD3RS/graph.json","fetch_events":"https://pith.science/api/pith-number/EDZTKGEMYFOSLTX34ET5KQD3RS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS/action/storage_attestation","attest_author":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS/action/author_attestation","sign_citation":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS/action/citation_signature","submit_replication":"https://pith.science/pith/EDZTKGEMYFOSLTX34ET5KQD3RS/action/replication_record"}},"created_at":"2026-06-23T00:11:52.822908+00:00","updated_at":"2026-06-23T00:11:52.822908+00:00"}