{"total":13,"items":[{"citing_arxiv_id":"2605.12382","ref_index":21,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Pretraining Exposure Explains Popularity Judgments in Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-05-12T16:45:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"LLM popularity judgments align more closely with pretraining data exposure counts than with Wikipedia popularity, with stronger effects in pairwise comparisons and larger models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12574","ref_index":22,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"DistractMIA: Black-Box Membership Inference on Vision-Language Models via Semantic Distraction","primary_cat":"cs.CV","submitted_at":"2026-05-12T12:04:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DistractMIA performs output-only black-box membership inference on vision-language models by inserting semantic distractors and measuring shifts in generated text responses.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06865","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Dataset Watermarking for Closed LLMs with Provable Detection","primary_cat":"cs.LG","submitted_at":"2026-05-07T19:06:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A new watermarking method for closed LLMs boosts random word-pair co-occurrences via rephrasing and detects the signal statistically in outputs, working reliably even when the watermarked data is only 1% of fine-tuning tokens while preserving utility.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00553","ref_index":42,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Stable-GFlowNet: Toward Diverse and Robust LLM Red-Teaming via Contrastive Trajectory Balance","primary_cat":"cs.LG","submitted_at":"2026-05-01T10:42:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Stable-GFlowNet improves training stability and attack diversity in LLM red-teaming by eliminating Z estimation via contrastive trajectory balance while preserving GFN optimality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00364","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Unlearning What Matters: Token-Level Attribution for Precise Language Model Unlearning","primary_cat":"cs.CL","submitted_at":"2026-05-01T02:59:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TokenUnlearn identifies critical tokens via masking and entropy signals then applies hard selection or soft weighting to unlearn only those tokens, yielding better forgetting and retained utility than sequence-level baselines on TOFU and WMDP.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23338","ref_index":69,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"A Systematic Survey of Security Threats and Defenses in LLM-Based AI Agents: A Layered Attack Surface Framework","primary_cat":"cs.CR","submitted_at":"2026-04-25T14:57:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A new 7x4 taxonomy organizes agentic AI security threats by architectural layer and persistence timescale, revealing under-explored upper layers and missing defenses after surveying 116 papers.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20932","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Adaptive Defense Orchestration for RAG: A Sentinel-Strategist Architecture against Multi-Vector Attacks","primary_cat":"cs.CR","submitted_at":"2026-04-22T11:17:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A context-aware Sentinel-Strategist system for RAG selectively applies defenses to block membership inference and data poisoning while recovering most retrieval utility compared to always-on defense stacks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17771","ref_index":14,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SPENCE: A Syntactic Probe for Detecting Contamination in NL2SQL Benchmarks","primary_cat":"cs.CL","submitted_at":"2026-04-20T03:50:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SPENCE shows older NL2SQL benchmarks like Spider have high performance sensitivity to syntactic changes, indicating likely training contamination, while newer ones like BIRD show little sensitivity and appear largely clean.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17396","ref_index":108,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Representation-Guided Parameter-Efficient LLM Unlearning","primary_cat":"cs.CL","submitted_at":"2026-04-19T11:59:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"REGLU guides LoRA-based unlearning via representation subspaces and orthogonal regularization to outperform prior methods on forget-retain trade-off in LLM benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07825","ref_index":48,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Filling the Gaps: Selective Knowledge Augmentation for LLM Recommenders","primary_cat":"cs.IR","submitted_at":"2026-04-09T05:27:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"KnowSA_CKP uses comparative knowledge probing to selectively augment LLM prompts for items with knowledge gaps, improving recommendation accuracy and context efficiency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07650","ref_index":16,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"How Independent are Large Language Models? A Statistical Framework for Auditing Behavioral Entanglement and Reweighting Verifier Ensembles","primary_cat":"cs.AI","submitted_at":"2026-04-08T23:32:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A new auditing framework reveals widespread behavioral entanglement among LLMs and shows that reweighting ensembles based on measured independence improves verification accuracy by up to 4.5%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.03199","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Learning the Signature of Memorization in Autoregressive Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-03T17:17:51+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":8.0,"formal_verification":"none","one_line_summary":"A classifier trained only on transformer fine-tuning data detects an invariant memorization signature that transfers to Mamba, RWKV-4, and RecurrentGemma with AUCs of 0.963, 0.972, and 0.936.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2403.07974","ref_index":219,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","primary_cat":"cs.SE","submitted_at":"2024-03-12T17:58:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LiveCodeBench collects 400 recent contest problems to create a contamination-free benchmark evaluating LLMs on code generation and related capabilities like self-repair and execution.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}