{"total":85,"items":[{"citing_arxiv_id":"2605.13527","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MMSkills: Towards Multimodal Skills for General Visual Agents","primary_cat":"cs.AI","submitted_at":"2026-05-13T13:40:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MMSkills turns public interaction trajectories into compact multimodal skill packages that visual agents can consult at runtime to improve decision-making on benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12694","ref_index":28,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Agentic Interpretation: Lattice-Structured Evidence for LLM-Based Program Analysis","primary_cat":"cs.SE","submitted_at":"2026-05-12T19:46:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Agentic interpretation uses lattices to track LLM judgments on decomposed program claims during analysis.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12618","ref_index":50,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Career Mobility of Planning Alumni in the United States: Evidence from Professional Profile Data using Large Language Models","primary_cat":"cs.CY","submitted_at":"2026-05-12T18:06:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Analysis of LinkedIn profiles shows planning alumni with multisector or lateral career trajectories achieve significantly higher upward mobility.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12321","ref_index":42,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"LISA: Cognitive Arbitration for Signal-Free Autonomous Intersection Management","primary_cat":"cs.AI","submitted_at":"2026-05-12T16:04:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LISA applies LLMs as primary decision-makers for signal-free intersection management, cutting mean control delay by up to 89.1% and maintaining better service levels than fixed-cycle, SCATS, AIM, or GLOSA baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12055","ref_index":12,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Do Language Models Encode Knowledge of Linguistic Constraint Violations?","primary_cat":"cs.CL","submitted_at":"2026-05-12T12:37:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Sparse autoencoder analysis of language model activations finds limited evidence for a unified set of features detecting linguistic constraint violations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11753","ref_index":198,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Towards Visually Grounded Multimodal Summarization via Cross-Modal Transformer and Gated Attention","primary_cat":"cs.AI","submitted_at":"2026-05-12T08:28:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SPeCTrA-Sum uses hierarchical cross-modal fusion via DVP and DPP-distilled image selection via VRP to generate more accurate and visually grounded multimodal summaries.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11693","ref_index":199,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Measuring What Matters Beyond Text: Evaluating Multimodal Summaries by Quality, Alignment, and Diversity","primary_cat":"cs.AI","submitted_at":"2026-05-12T07:50:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MM-Eval unifies evaluation of multimodal summaries by integrating factual text quality, cross-modal relevance via MLLM judge, and visual diversity via truncated CLIP entropy, then calibrates their combination on human preferences.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11442","ref_index":40,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Can a Single Message Paralyze the AI Infrastructure? The Rise of AbO-DDoS Attacks through Targeted Mobius Injection","primary_cat":"cs.CR","submitted_at":"2026-05-12T02:51:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Mobius Injection exploits semantic closure in LLM agents to enable single-message AbO-DDoS attacks achieving up to 51x call amplification and 229x latency inflation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11206","ref_index":152,"ref_count":2,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Instructions Shape Production of Language, not Processing","primary_cat":"cs.CL","submitted_at":"2026-05-11T20:21:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Instructions trigger a production-centered mechanism in language models, with task-specific information stable in input tokens but varying strongly in output tokens and correlating with behavior.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11188","ref_index":22,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Adversarial SQL Injection Generation with LLM-Based Architectures","primary_cat":"cs.CR","submitted_at":"2026-05-11T19:52:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RADAGAS-GPT4o achieves a 22.73% bypass rate against 10 WAFs, succeeding more against AI/ML-based firewalls than rule-based ones.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11163","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Benchmarking LLM-Based Static Analysis for Secure Smart Contract Development: Reliability, Limitations, and Potential Hybrid Solutions","primary_cat":"cs.CR","submitted_at":"2026-05-11T19:10:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LLMs for smart contract security analysis show lexical bias from identifier names causing high false positives, with prompting creating precision-recall trade-offs, positioning them as complements rather than replacements for static analysis tools.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10870","ref_index":23,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Remember the Decision, Not the Description: A Rate-Distortion Framework for Agent Memory","primary_cat":"cs.AI","submitted_at":"2026-05-11T17:20:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Memory for long-horizon agents should preserve distinctions that affect decisions under a fixed budget, not descriptive features, yielding an exact forgetting boundary and a new online learner DeMem with regret guarantees.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10516","ref_index":4,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Consistency as a Testable Property: Statistical Methods to Evaluate AI Agent Reliability","primary_cat":"cs.AI","submitted_at":"2026-05-11T13:06:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A framework with U-statistics and kernel-based metrics quantifies AI agent consistency and robustness, showing trajectory metrics outperform pass@1 rates in diagnosing failures.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10481","ref_index":23,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Safe Multi-Agent Behavior Must Be Maintained, Not Merely Asserted: Constraint Drift in LLM-Based Multi-Agent Systems","primary_cat":"cs.MA","submitted_at":"2026-05-11T12:43:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Safety constraints in LLM-based multi-agent systems commonly weaken during execution through memory, communication, and tool use, requiring them to be maintained as explicit state rather than asserted once.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10415","ref_index":17,"ref_count":2,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Aligning LLM Uncertainty with Human Disagreement in Subjectivity Analysis","primary_cat":"cs.CL","submitted_at":"2026-05-11T11:52:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DPUA is a two-phase framework that aligns LLM uncertainty expressions with human disagreement distributions in subjectivity analysis while preserving task performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08716","ref_index":18,"ref_count":1,"confidence":0.5,"is_internal_anchor":true,"paper_title":"Bias by Necessity: Impossibility Theorems for Sequential Processing with Convergent AI and Human Validation","primary_cat":"cs.AI","submitted_at":"2026-05-09T05:56:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Primacy, anchoring, and order-dependence are architecturally necessary in autoregressive models due to causal masking constraints, with supporting evidence from theorems, LLM fits, and human experiments.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08580","ref_index":14,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Slipstream: Trajectory-Grounded Compaction Validation for Long-Horizon Agents","primary_cat":"cs.MA","submitted_at":"2026-05-09T00:47:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Slipstream uses asynchronous compaction with trajectory-grounded judge validation to improve long-horizon agent accuracy by up to 8.8 percentage points and reduce latency by up to 39.7%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07301","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SOM: Structured Opponent Modeling for LLM-based Agents via Structural Causal Model","primary_cat":"cs.AI","submitted_at":"2026-05-08T06:11:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SOM uses a Structural Causal Model to create an explicit graph of opponent observation-to-action links, allowing LLMs to reason along those paths for more accurate and stable predictions in multi-agent settings.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07127","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"The Position Curse: LLMs Struggle to Locate the Last Few Items in a List","primary_cat":"cs.LG","submitted_at":"2026-05-08T02:04:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs exhibit the Position Curse, with backward position retrieval in lists lagging far behind forward retrieval, showing only partial gains from PosBench fine-tuning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06640","ref_index":4,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Concept-Based Abductive and Contrastive Explanations for Behaviors of Vision Models","primary_cat":"cs.LG","submitted_at":"2026-05-07T17:51:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Concept-based abductive and contrastive explanations find minimal high-level concepts that causally determine vision model outcomes on individual images or groups sharing a specified behavior.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05476","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Unified Benchmark for Evaluating Knowledge Graph Construction Methods and Graph Neural Networks","primary_cat":"cs.LG","submitted_at":"2026-05-06T21:53:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A dual-purpose benchmark supplies two text-derived knowledge graphs and one expert reference graph on the same biomedical corpus to jointly measure construction method quality and GNN robustness via semi-supervised node classification.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08212","ref_index":22,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LLMs with in-context learning for Algorithmic Theoretical Physics","primary_cat":"cs.LG","submitted_at":"2026-05-06T09:30:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Frontier LLMs with in-context learning and CAS integration solve most algorithmic tasks in theoretical physics when supplied with worked examples.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04496","ref_index":75,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"SCOUT: Active Information Foraging for Long-Text Understanding with Decoupled Epistemic States","primary_cat":"cs.CL","submitted_at":"2026-05-06T04:55:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SCOUT achieves state-of-the-art long-text understanding with up to 8x lower token use by actively foraging for sparse query-relevant information and updating a compact provenance-grounded epistemic state.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04428","ref_index":24,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Submodular Ground-Set Pruning: Monotone Tightness and a Non-Monotone Separation","primary_cat":"cs.DS","submitted_at":"2026-05-06T02:43:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"For monotone submodular maximization, containment pruning has a tight 1-1/e factor; for non-monotone objectives, 1/2-ε algorithms exist that exceed known optimization hardness bounds.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04425","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Joint Semantic Token Selection and Prompt Optimization for Interpretable Prompt Learning","primary_cat":"cs.CV","submitted_at":"2026-05-06T02:38:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"IPL alternates discrete semantic token selection using approximate submodular optimization with continuous prompt optimization to boost both interpretability and task performance in vision-language model adaptation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05245","ref_index":23,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"AdaGATE: Adaptive Gap-Aware Token-Efficient Evidence Assembly for Multi-Hop Retrieval-Augmented Generation","primary_cat":"cs.CL","submitted_at":"2026-05-04T14:45:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AdaGATE improves evidence F1 scores on HotpotQA for multi-hop RAG under clean, redundant, and noisy conditions by framing selection as gap-aware token-constrained repair, outperforming baselines while using 2.6x fewer tokens.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02608","ref_index":59,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Dependency Parsing Across the Resource Spectrum: Evaluating Architectures on High and Low-Resource Languages","primary_cat":"cs.CL","submitted_at":"2026-05-04T13:55:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Biaffine LSTM outperforms transformer parsers like AfroXLMR and RemBERT in low-resource dependency parsing, with transformers gaining advantage as data increases and morphological complexity as a secondary predictor.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01858","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Decouple and Cache: KV Cache Construction for Streaming Video Understanding","primary_cat":"cs.CV","submitted_at":"2026-05-03T13:02:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DSCache decouples cumulative past and instant KV caches with position-agnostic encoding to adapt offline VideoVLLMs to streaming video, delivering 2.5% average accuracy gains on QA benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01342","ref_index":14,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Don't Be a Pot Stirrer! Authorized Vector Data Retrieval via Access-Aware Indexing","primary_cat":"cs.DB","submitted_at":"2026-05-02T09:26:04+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Veda and EffVeda partition vectors into disjoint role-combination blocks, apply lattice-based copy and merge operations within a storage budget, index large nodes with HNSW, and use coordinated search with distance bounds to deliver higher throughput at high recall.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01199","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Focus and Dilution: The Multi-stage Learning Process of Attention","primary_cat":"cs.LG","submitted_at":"2026-05-02T02:30:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"In one-layer Transformers trained on Markovian data, attention undergoes a cycle of rapid rank-one condensation, frequency-driven focus on high-frequency tokens, dilution via embedding perturbations, and restart from low-frequency asymmetries.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00702","ref_index":51,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Learning How and What to Memorize: Cognition-Inspired Two-Stage Optimization for Evolving Memory","primary_cat":"cs.CL","submitted_at":"2026-05-01T14:45:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MemCoE learns memory organization guidelines via contrastive feedback and then trains a guideline-aligned RL policy for memory updates, yielding consistent gains on personalization benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00398","ref_index":261,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"M-CaStLe: Uncovering Local Causal Structures in Multivariate Space-Time Gridded Data","primary_cat":"cs.LG","submitted_at":"2026-05-01T04:40:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"M-CaStLe generalizes local stencil-based causal discovery to the multivariate case and decomposes resulting graphs into reaction and spatial components for interpretation in space-time gridded data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00318","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Structure-Aware Chunking for Tabular Data in Retrieval-Augmented Generation","primary_cat":"cs.CL","submitted_at":"2026-05-01T00:57:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"STC reduces tabular chunk counts by up to 56% versus baselines and raises hybrid MRR to 0.5945 and BM25 Recall@1 to 0.754 by preserving row structure during chunking.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27906","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"From Unstructured Recall to Schema-Grounded Memory: Reliable AI Memory via Iterative, Schema-Aware Extraction","primary_cat":"cs.AI","submitted_at":"2026-04-30T14:14:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Schema-aware iterative extraction turns AI memory into a verified system of record, reaching 90-97% accuracy on extraction and end-to-end memory benchmarks where retrieval baselines score 80-87%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27306","ref_index":27,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"NuggetIndex: Governed Atomic Retrieval for Maintainable RAG","primary_cat":"cs.IR","submitted_at":"2026-04-30T01:33:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"NuggetIndex manages atomic nuggets with temporal validity and lifecycle metadata to filter outdated information before ranking, yielding 42% higher nugget recall, 9pp better temporal correctness, and 55% fewer conflicts than passage or unmanaged proposition baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26622","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"OCR-Memory: Optical Context Retrieval for Long-Horizon Agent Memory","primary_cat":"cs.CL","submitted_at":"2026-04-29T12:49:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"OCR-Memory encodes agent trajectories as images with visual anchors and retrieves verbatim text via locate-and-transcribe, yielding gains on long-horizon benchmarks under strict context limits.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26525","ref_index":33,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PRAG: End-to-End Privacy-Preserving Retrieval-Augmented Generation","primary_cat":"cs.CR","submitted_at":"2026-04-29T10:46:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PRAG delivers end-to-end private RAG with 72-74% recall via non-interactive homomorphic approximations, interactive client assistance, and operation-error estimation to preserve ranking quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23371","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"When Context Sticks: Studying Interference in In-Context Learning","primary_cat":"cs.LG","submitted_at":"2026-04-25T16:35:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"In-context learning shows persistent interference from prior examples, with more misleading linear examples degrading quadratic predictions and training curricula modulating recovery speed.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23057","ref_index":5,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Don't Make the LLM Read the Graph: Make the Graph Think","primary_cat":"cs.AI","submitted_at":"2026-04-24T22:56:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Belief graphs improve LLM 2nd-order theory-of-mind performance in Hanabi when they gate action selection (100% vs 20%) but are mostly decorative as prompt context for strong models, with model-family-specific planner defiance and diminishing returns from deeper graphs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22442","ref_index":24,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"HubRouter: A Pluggable Sub-Quadratic Routing Primitive for Hybrid Sequence Models","primary_cat":"cs.LG","submitted_at":"2026-04-24T10:59:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HubRouter is a sub-quadratic routing primitive using learned hubs that replaces attention layers in hybrid models while delivering competitive perplexity and large throughput gains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22128","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Dissociating Decodability and Causal Use in Bracket-Sequence Transformers","primary_cat":"cs.CL","submitted_at":"2026-04-24T00:26:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"In Dyck-language transformers, attention patterns causally use top-of-stack information while residual-stream depth and distance signals are decodable yet causally inert.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21889","ref_index":8,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"TingIS: Real-time Risk Event Discovery from Noisy Customer Incidents at Enterprise Scale","primary_cat":"cs.CL","submitted_at":"2026-04-23T17:40:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"TingIS uses multi-stage LLM event linking plus routing and filtering to extract high-priority incidents from noisy customer data at 2,000 messages per minute, delivering 3.5-minute P90 latency and 95% discovery in production.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21100","ref_index":86,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Preconditioned DeltaNet: Curvature-aware Sequence Modeling for Linear Recurrences","primary_cat":"cs.LG","submitted_at":"2026-04-22T21:38:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Preconditioned delta-rule models with a diagonal curvature approximation improve upon standard DeltaNet, GDN, and KDA by better approximating the test-time regression objective.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20727","ref_index":10,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Supplement Generation Training for Enhancing Agentic Task Performance","primary_cat":"cs.LG","submitted_at":"2026-04-22T16:12:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SGT trains a lightweight model to generate task-specific supplemental text that improves performance of a larger frozen LLM on agentic tasks without modifying the large model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20943","ref_index":4,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SCM: Sleep-Consolidated Memory with Algorithmic Forgetting for Large Language Models","primary_cat":"cs.LG","submitted_at":"2026-04-22T15:47:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SCM enables LLMs to achieve perfect recall in ten-turn conversations by using sleep-like consolidation and adaptive forgetting to reduce memory noise by over 90%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22849","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"R$^3$AG: Retriever Routing for Retrieval-Augmented Generation","primary_cat":"cs.IR","submitted_at":"2026-04-22T06:51:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"R³AG routes queries to retrievers by decomposing capabilities into retrieval quality and generation utility, trained via contrastive learning on document assessments and downstream answer correctness to outperform static methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20911","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Omission Constraints Decay While Commission Constraints Persist in Long-Context LLM Agents","primary_cat":"cs.CR","submitted_at":"2026-04-22T01:25:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Omission constraints in LLM agents decay with conversation length while commission constraints remain stable, creating an invisible security failure.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19656","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":true,"paper_title":"Pause or Fabricate? Training Language Models for Grounded Reasoning","primary_cat":"cs.CL","submitted_at":"2026-04-21T16:45:29+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GRIL uses stage-specific RL rewards to train LLMs to detect missing premises, pause proactively, and resume grounded reasoning after clarification, yielding up to 45% better premise detection and 30% higher task success on insufficient math datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18835","ref_index":55,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Semantic Needles in Document Haystacks: Sensitivity Testing of LLM-as-a-Judge Similarity Scoring","primary_cat":"cs.CL","submitted_at":"2026-04-20T20:59:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LLMs exhibit positional bias and context-dependent scoring patterns when judging document similarity, with each model showing a stable scoring fingerprint but a shared hierarchy of sensitivity to different semantic perturbations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18309","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"From Program Slices to Causal Clarity: Evaluating Faithful, Actionable LLM-Generated Failure Explanations via Context Partitioning and LLM-as-a-Judge","primary_cat":"cs.SE","submitted_at":"2026-04-20T14:16:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Focused, failure-specific contexts such as program slices produce more causal and actionable LLM bug explanations than large undifferentiated contexts, and higher-quality explanations correlate with better downstream repair success rates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}