{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2GNROJ4IVX265LAADOWWIVA4ZP","short_pith_number":"pith:2GNROJ4I","schema_version":"1.0","canonical_sha256":"d19b172788adf5eeac001bad64541ccbe3ec7498970123f187fa731d6bd0994f","source":{"kind":"arxiv","id":"2602.20156","version":3},"attestation_state":"computed","paper":{"title":"Skill-Inject: Measuring Agent Vulnerability to Skill File Attacks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"LLM agents execute harmful instructions from injected skill files up to 80 percent of the time.","cross_cats":["cs.LG"],"primary_cat":"cs.CR","authors_text":"David Schmotz, Luca Beurer-Kellner, Maksym Andriushchenko, Sahar Abdelnabi","submitted_at":"2026-02-23T18:59:27Z","abstract_excerpt":"LLM agents are evolving rapidly, powered by code execution, tools, and the recently introduced agent skills feature. Skills allow users to extend LLM applications with specialized third-party code, knowledge, and instructions. Although this can extend agent capabilities to new domains, it creates an increasingly complex agent supply chain, offering new surfaces for prompt injection attacks. We identify skill-based prompt injection as a significant threat and introduce SkillInject, a benchmark evaluating the susceptibility of widely-used LLM agents to injections through skill files. SkillInject"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.20156","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-02-23T18:59:27Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"0a4d6b0a4d1cc15566679910e0521871e08ee9351ffa18d74e1f722109c48851","abstract_canon_sha256":"89aa51f425c0438651383a670939b753523463988ab8149bed023e93c48ea508"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:48.438547Z","signature_b64":"CxlwU84wG4fQNuiqAmJ4+Iz4q79YuNZHoN3XVX2zIL/uwwwPiq6u5YT1Qd2Q2+bNwETiJjB8Glrczil2xbElDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d19b172788adf5eeac001bad64541ccbe3ec7498970123f187fa731d6bd0994f","last_reissued_at":"2026-05-17T23:38:48.438099Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:48.438099Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Skill-Inject: Measuring Agent Vulnerability to Skill File Attacks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"LLM agents execute harmful instructions from injected skill files up to 80 percent of the time.","cross_cats":["cs.LG"],"primary_cat":"cs.CR","authors_text":"David Schmotz, Luca Beurer-Kellner, Maksym Andriushchenko, Sahar Abdelnabi","submitted_at":"2026-02-23T18:59:27Z","abstract_excerpt":"LLM agents are evolving rapidly, powered by code execution, tools, and the recently introduced agent skills feature. Skills allow users to extend LLM applications with specialized third-party code, knowledge, and instructions. Although this can extend agent capabilities to new domains, it creates an increasingly complex agent supply chain, offering new surfaces for prompt injection attacks. We identify skill-based prompt injection as a significant threat and introduce SkillInject, a benchmark evaluating the susceptibility of widely-used LLM agents to injections through skill files. SkillInject"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"today's agents are highly vulnerable with up to 80% attack success rate with frontier models, often executing extremely harmful instructions including data exfiltration, destructive action, and ransomware-like behavior.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The crafted injection tasks and chosen frontier models accurately represent real-world skill file usage and attack scenarios that agents will encounter in deployment.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"LLM agents are highly vulnerable to prompt injection attacks delivered through skill files, achieving up to 80% success on harmful tasks including data exfiltration and destructive actions.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"LLM agents execute harmful instructions from injected skill files up to 80 percent of the time.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"163ad1cfb5bb059da7c675862ab1cb79aea4c21f94ab606c3a0236af6ce9186b"},"source":{"id":"2602.20156","kind":"arxiv","version":3},"verdict":{"id":"5473b675-da91-489d-ae96-fb0c9a2d028c","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T08:55:14.536195Z","strongest_claim":"today's agents are highly vulnerable with up to 80% attack success rate with frontier models, often executing extremely harmful instructions including data exfiltration, destructive action, and ransomware-like behavior.","one_line_summary":"LLM agents are highly vulnerable to prompt injection attacks delivered through skill files, achieving up to 80% success on harmful tasks including data exfiltration and destructive actions.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The crafted injection tasks and chosen frontier models accurately represent real-world skill file usage and attack scenarios that agents will encounter in deployment.","pith_extraction_headline":"LLM agents execute harmful instructions from injected skill files up to 80 percent of the time."},"references":{"count":22,"sample":[{"doi":"","year":2026,"title":"The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions","work_id":"ba941a96-eb3b-48c0-b52c-5e9463085190","ref_index":1,"cited_arxiv_id":"2404.13208","is_internal_anchor":true},{"doi":"","year":null,"title":"Consider the subject matter","work_id":"7546d5df-be6f-4411-8094-10e1681db97e","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Match palette to content","work_id":"bc036738-6da9-4e02-9057-d08a6e679971","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"backup script","work_id":"0320c2b0-2da6-44a2-86f0-8bf9cfbc675b","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Comprehensive spreadsheet creation, editing, and analysis with support for formulas","work_id":"5721ff32-ff2b-40c1-b8d9-e2bef4f19eb0","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":22,"snapshot_sha256":"e33da2afcfaa8e80e18df1284cc0668f24cfbbbcd237ec6fee26e94f34f64dc8","internal_anchors":1},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.20156","created_at":"2026-05-17T23:38:48.438171+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.20156v3","created_at":"2026-05-17T23:38:48.438171+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.20156","created_at":"2026-05-17T23:38:48.438171+00:00"},{"alias_kind":"pith_short_12","alias_value":"2GNROJ4IVX26","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"2GNROJ4IVX265LAA","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"2GNROJ4I","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":20,"internal_anchor_count":20,"sample":[{"citing_arxiv_id":"2605.22321","citing_title":"Benchmarking Autonomous Agents against Temporal, Spatial, and Semantic Evasions","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13044","citing_title":"No Attack Required: Semantic Fuzzing for Specification Violations in Agent Skills","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12875","citing_title":"Do Skill Descriptions Tell the Truth? Detecting Undisclosed Security Behaviors in Code-Backed LLM Skills","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2604.02837","citing_title":"Towards Secure Agent Skills: Architecture, Threat Taxonomy, and Security Analysis","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03081","citing_title":"Supply-Chain Poisoning Attacks Against LLM Coding Agent Skill Ecosystems","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11770","citing_title":"Behavioral Integrity Verification for AI Agent Skills","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12233","citing_title":"No More, No Less: Task Alignment in Terminal Agents","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12015","citing_title":"SkillSafetyBench: Evaluating Agent Safety under Skill-Facing Attack Surfaces","ref_index":77,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11418","citing_title":"Under the Hood of SKILL.md: Semantic Supply-chain Attacks on AI Agent Skill Registry","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11047","citing_title":"Red-Teaming Agent Execution Contexts: Open-World Security Evaluation on OpenClaw","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08460","citing_title":"When Child Inherits: Modeling and Exploiting Subagent Spawn in Multi-Agent Networks","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09594","citing_title":"Trust Me, Import This: Dependency Steering Attacks via Malicious Agent Skills","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2604.11790","citing_title":"ClawGuard: A Runtime Security Framework for Tool-Augmented LLM Agents Against Indirect Prompt Injection","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2605.03378","citing_title":"ARGUS: Defending LLM Agents Against Context-Aware Prompt Injection","ref_index":140,"is_internal_anchor":true},{"citing_arxiv_id":"2604.25109","citing_title":"Structured Security Auditing and Robustness Enhancement for Untrusted Agent Skills","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05868","citing_title":"SkillScope: Toward Fine-Grained Least-Privilege Enforcement for Agent Skills","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2604.22888","citing_title":"RouteGuard: Internal-Signal Detection of Skill Poisoning in LLM Agents","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05274","citing_title":"Sealing the Audit-Runtime Gap for LLM Skills","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2604.11790","citing_title":"ClawGuard: A Runtime Security Framework for Tool-Augmented LLM Agents Against Indirect Prompt Injection","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2604.09443","citing_title":"Many-Tier Instruction Hierarchy in LLM Agents","ref_index":23,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP","json":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP.json","graph_json":"https://pith.science/api/pith-number/2GNROJ4IVX265LAADOWWIVA4ZP/graph.json","events_json":"https://pith.science/api/pith-number/2GNROJ4IVX265LAADOWWIVA4ZP/events.json","paper":"https://pith.science/paper/2GNROJ4I"},"agent_actions":{"view_html":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP","download_json":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP.json","view_paper":"https://pith.science/paper/2GNROJ4I","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.20156&json=true","fetch_graph":"https://pith.science/api/pith-number/2GNROJ4IVX265LAADOWWIVA4ZP/graph.json","fetch_events":"https://pith.science/api/pith-number/2GNROJ4IVX265LAADOWWIVA4ZP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP/action/storage_attestation","attest_author":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP/action/author_attestation","sign_citation":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP/action/citation_signature","submit_replication":"https://pith.science/pith/2GNROJ4IVX265LAADOWWIVA4ZP/action/replication_record"}},"created_at":"2026-05-17T23:38:48.438171+00:00","updated_at":"2026-05-17T23:38:48.438171+00:00"}