{"total":15,"items":[{"citing_arxiv_id":"2605.12466","ref_index":39,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Solve the Loop: Attractor Models for Language and Reasoning","primary_cat":"cs.LG","submitted_at":"2026-05-12T17:51:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Attractor Models solve for fixed points in transformer embeddings using implicit differentiation to enable stable iterative refinement, delivering better perplexity, accuracy, and efficiency than standard or looped transformers.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11011","ref_index":54,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"LoopUS: Recasting Pretrained LLMs into Looped Latent Refinement Models","primary_cat":"cs.LG","submitted_at":"2026-05-10T11:05:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LoopUS converts pretrained LLMs into looped latent refinement models via block decomposition, selective gating, random deep supervision, and confidence-based early exiting to improve reasoning performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06856","ref_index":58,"ref_count":2,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Benchmarked Yet Not Measured -- Generative AI Should be Evaluated Against Real-World Utility","primary_cat":"cs.LG","submitted_at":"2026-05-07T18:56:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Generative AI evaluation must shift from static benchmark scores to measuring sustained improvements in human capabilities within specific deployment contexts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06207","ref_index":51,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Taming the Entropy Cliff: Variable Codebook Size Quantization for Autoregressive Visual Generation","primary_cat":"cs.CV","submitted_at":"2026-05-07T13:13:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Variable codebook sizes that increase along the sequence in visual tokenizers reduce generation FID scores significantly for autoregressive models on ImageNet.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23434","ref_index":21,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"When Does Removing LayerNorm Help? Activation Bounding as a Regime-Dependent Implicit Regularizer","primary_cat":"cs.LG","submitted_at":"2026-04-25T20:12:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DyT improves validation loss 27% at 64M params/1M tokens but worsens it 19% at 118M tokens, with saturation levels predicting the sign of the effect.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21106","ref_index":43,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"How Much Is One Recurrence Worth? Iso-Depth Scaling Laws for Looped Language Models","primary_cat":"cs.LG","submitted_at":"2026-04-22T21:51:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A fitted iso-depth scaling law measures that one recurrence in looped transformers is worth r^0.46 unique blocks in validation loss.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21100","ref_index":72,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Preconditioned DeltaNet: Curvature-aware Sequence Modeling for Linear Recurrences","primary_cat":"cs.LG","submitted_at":"2026-04-22T21:38:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Preconditioned delta-rule models with a diagonal curvature approximation improve upon standard DeltaNet, GDN, and KDA by better approximating the test-time regression objective.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15180","ref_index":9,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AdaSplash-2: Faster Differentiable Sparse Attention","primary_cat":"cs.LG","submitted_at":"2026-04-16T16:03:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AdaSplash-2 introduces a histogram-based initialization for the α-entmax normalizer that cuts iterations to 1-2 and, with a sparsity-aware GPU kernel, matches or beats FlashAttention-2 training speed at moderate-to-high sparsity while delivering long-context gains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11080","ref_index":7,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"ReSpinQuant: Efficient Layer-Wise LLM Quantization via Subspace Residual Rotation Approximation","primary_cat":"cs.CV","submitted_at":"2026-04-13T07:00:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"ReSpinQuant achieves state-of-the-art accuracy in W4A4 and W3A3 LLM quantization by using efficient residual subspace rotation approximations that match layer-wise performance while retaining the inference speed of global rotation methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09083","ref_index":51,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"EdgeFlow: Fast Cold Starts for LLMs on Mobile Devices","primary_cat":"cs.OS","submitted_at":"2026-04-10T08:09:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"EdgeFlow reduces mobile LLM cold-start latency up to 4.07x versus llama.cpp, MNN, and llm.npu by NPU-aware adaptive quantization, SIMD-friendly packing, and synergistic granular CPU-NPU pipelining at comparable accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08118","ref_index":22,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Initialisation Determines the Basin: Efficient Codebook Optimisation for Extreme LLM Quantization","primary_cat":"cs.CL","submitted_at":"2026-04-09T11:38:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Output-aware EM initialization for codebooks in additive quantization avoids poor optimization basins and yields better 2-bit compressed LLMs across Llama and Qwen models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2501.00663","ref_index":81,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Titans: Learning to Memorize at Test Time","primary_cat":"cs.LG","submitted_at":"2024-12-31T22:32:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Titans combine attention for current context with a learnable neural memory for long-term history, achieving better performance and scaling to over 2M-token contexts on language, reasoning, genomics, and time-series tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2311.12983","ref_index":129,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"GAIA: a benchmark for General AI Assistants","primary_cat":"cs.CL","submitted_at":"2023-11-21T20:34:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"GAIA benchmark shows humans at 92% accuracy on simple real-world questions far outperform current AI systems at 15%, proposing this gap as a key milestone for general AI.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2309.17453","ref_index":37,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Efficient Streaming Language Models with Attention Sinks","primary_cat":"cs.CL","submitted_at":"2023-09-29T17:59:56+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"StreamingLLM lets finite-window LLMs generalize to infinite-length sequences by retaining initial-token KV states as attention sinks, enabling stable streaming inference up to 4M tokens.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2305.10403","ref_index":107,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"PaLM 2 Technical Report","primary_cat":"cs.CL","submitted_at":"2023-05-17T17:46:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PaLM 2 reports state-of-the-art results on language, reasoning, and multilingual tasks with improved efficiency over PaLM.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}