{"total":12,"items":[{"citing_arxiv_id":"2605.11868","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"IPI-proxy: An Intercepting Proxy for Red-Teaming Web-Browsing AI Agents Against Indirect Prompt Injection","primary_cat":"cs.CR","submitted_at":"2026-05-12T09:48:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"IPI-proxy is a toolkit using an intercepting proxy to inject indirect prompt injection attacks into live web pages for testing AI browsing agents against hidden instructions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07269","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MIPIAD: Multilingual Indirect Prompt Injection Attack Defense with Qwen -- TF-IDF Hybrid and Meta-Ensemble Learning","primary_cat":"cs.CL","submitted_at":"2026-05-08T05:34:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"MIPIAD reports a hybrid Qwen-TF-IDF ensemble defense that reaches F1 0.9205 and reduces the English-Bangla performance gap on a 1.43-million-sample synthetic benchmark derived from BIPIA templates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08258","ref_index":24,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Designing Intelligent Enterprise Agents: A Capability-Aligned Multi-Agent Architecture","primary_cat":"cs.MA","submitted_at":"2026-05-07T21:42:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CEAD architecture for intelligent enterprise agents achieves 70.6% safe success rate on 10,000 tasks by making agent design the primary abstraction rather than governance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02236","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Perturbation Dose Responses in Recursive LLM Loops: Raw Switching, Stochastic Floors, and Persistent Escape under Append, Replace, and Dialog Updates","primary_cat":"cs.AI","submitted_at":"2026-05-04T05:16:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"In 30-step recursive LLM loops, append-mode persistent escape from source basins reaches 50% near 400 tokens under full history but plateaus below 50% under tail-clip memory policy, while replace-mode switching largely reflects state reset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.25109","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Structured Security Auditing and Robustness Enhancement for Untrusted Agent Skills","primary_cat":"cs.CR","submitted_at":"2026-04-28T01:32:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SkillGuard-Robust formulates pre-load auditing of untrusted Agent Skills as a three-way classification task and achieves 97.30% exact match and 98.33% malicious-risk recall on held-out benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23887","ref_index":11,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Evaluation of Prompt Injection Defenses in Large Language Models","primary_cat":"cs.CR","submitted_at":"2026-04-26T21:22:35+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23374","ref_index":36,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Ghost in the Agent: Redefining Information Flow Tracking for LLM Agents","primary_cat":"cs.CR","submitted_at":"2026-04-25T16:39:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"NeuroTaint is the first taint tracking framework for LLM agents that uses offline auditing of semantic, causal, and persistent context to detect flows from untrusted sources to privileged sinks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19657","ref_index":82,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"An AI Agent Execution Environment to Safeguard User Data","primary_cat":"cs.CR","submitted_at":"2026-04-21T16:45:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GAAP guarantees confidentiality of private user data for AI agents by enforcing user-specified permissions deterministically through persistent information flow tracking, without trusting the agent or requiring attack-free models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09443","ref_index":31,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Many-Tier Instruction Hierarchy in LLM Agents","primary_cat":"cs.CL","submitted_at":"2026-04-10T16:00:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ManyIH and ManyIH-Bench address instruction conflicts in LLM agents with up to 12 privilege levels across 853 tasks, revealing frontier models achieve only ~40% accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2410.02644","ref_index":157,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Agent Security Bench (ASB): Formalizing and Benchmarking Attacks and Defenses in LLM-based Agents","primary_cat":"cs.CR","submitted_at":"2024-10-03T16:30:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ASB is a new benchmark that tests 10 prompt injection attacks, memory poisoning, a novel Plan-of-Thought backdoor attack, and 11 defenses on LLM agents across 13 models, finding attack success rates up to 84.3% and limited defense effectiveness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.13352","ref_index":70,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for LLM Agents","primary_cat":"cs.CR","submitted_at":"2024-06-19T08:55:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"AgentDojo introduces an extensible evaluation framework populated with realistic agent tasks and security test cases to measure prompt injection robustness in tool-using LLM agents.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2404.13208","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions","primary_cat":"cs.CR","submitted_at":"2024-04-19T22:55:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Training LLMs on data that enforces priority levels for instructions makes models robust to prompt injection attacks, including unseen ones, with little loss on standard tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}