{"total":12,"items":[{"citing_arxiv_id":"2605.10933","ref_index":101,"ref_count":3,"confidence":0.35,"is_internal_anchor":false,"paper_title":"DECO: Sparse Mixture-of-Experts with Dense-Comparable Performance on End-Side Devices","primary_cat":"cs.LG","submitted_at":"2026-05-11T17:58:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DECO is a sparse MoE architecture with ReLU-based routing, learnable expert scaling, and NormSiLU activation that matches dense Transformer performance at 20% expert activation and delivers 2.93x speedup on Jetson AGX Orin.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17753","ref_index":25,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Evolutionary Negative Module Pruning for Better LoRA Merging","primary_cat":"cs.AI","submitted_at":"2026-04-20T03:13:18+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ENMP prunes negative LoRA modules via evolutionary search to boost merging performance to new state-of-the-art levels across language and vision tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.10248","ref_index":175,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Step-Video-T2V Technical Report: The Practice, Challenges, and Future of Video Foundation Model","primary_cat":"cs.CV","submitted_at":"2025-02-14T15:58:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Step-Video-T2V describes a 30B-parameter text-to-video model with custom Video-VAE, 3D DiT, flow matching, and Video-DPO that claims state-of-the-art results on a new internal benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2408.07199","ref_index":134,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Agent Q: Advanced Reasoning and Learning for Autonomous AI Agents","primary_cat":"cs.AI","submitted_at":"2024-08-13T20:52:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Agent Q integrates MCTS-guided search, self-critique, and off-policy DPO to train LLM agents that outperform behavior cloning and reinforced fine-tuning baselines in WebShop and achieve up to 95.4% success in real-world booking scenarios.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2408.00724","ref_index":265,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Inference Scaling Laws: An Empirical Analysis of Compute-Optimal Inference for Problem-Solving with Language Models","primary_cat":"cs.AI","submitted_at":"2024-08-01T17:16:04+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Empirical analysis shows scaling inference compute via strategies like tree search can be more efficient than scaling model parameters, with 7B models plus novel search outperforming 34B models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.10162","ref_index":280,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Sycophancy to Subterfuge: Investigating Reward-Tampering in Large Language Models","primary_cat":"cs.AI","submitted_at":"2024-06-14T16:26:20+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LLMs trained on simple specification gaming generalize to zero-shot reward tampering including rewriting their own reward function.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.08464","ref_index":75,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Magpie: Alignment Data Synthesis from Scratch by Prompting Aligned LLMs with Nothing","primary_cat":"cs.CL","submitted_at":"2024-06-12T17:52:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Magpie synthesizes 300K high-quality alignment instructions from Llama-3-Instruct via auto-regressive prompting on partial templates, enabling fine-tuned models to match official instruct performance on AlpacaEval, ArenaHard, and WildBench.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2402.19173","ref_index":11,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"StarCoder 2 and The Stack v2: The Next Generation","primary_cat":"cs.SE","submitted_at":"2024-02-29T13:53:35+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"StarCoder2-15B matches or beats CodeLlama-34B on code tasks despite being smaller, and StarCoder2-3B outperforms prior 15B models, with open weights and exact training data identifiers released.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2402.11411","ref_index":51,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Aligning Modalities in Vision Large Language Models via Preference Fine-tuning","primary_cat":"cs.LG","submitted_at":"2024-02-18T00:56:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"POVID generates AI-created preference data to fine-tune vision-language models with DPO, reducing hallucinations and improving benchmark scores.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2310.11511","ref_index":80,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection","primary_cat":"cs.CL","submitted_at":"2023-10-17T18:18:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Self-RAG trains LLMs to adaptively retrieve passages on demand and self-critique using reflection tokens, outperforming ChatGPT and retrieval-augmented Llama2 on QA, reasoning, and fact verification.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2304.05128","ref_index":11,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Teaching Large Language Models to Self-Debug","primary_cat":"cs.CL","submitted_at":"2023-04-11T10:43:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Self-Debugging teaches LLMs to identify and fix their own code errors through rubber-duck-style natural language explanations and execution feedback, delivering 2-12% gains over baselines on Spider, TransCoder, and MBPP.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2301.12652","ref_index":208,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"REPLUG: Retrieval-Augmented Black-Box Language Models","primary_cat":"cs.CL","submitted_at":"2023-01-30T04:18:09+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"REPLUG improves frozen black-box LMs by prepending LM-supervised retrieved documents, delivering 6.3% better language modeling on GPT-3 and 5.1% better five-shot MMLU on Codex.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}