{"work":{"id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","openalex_id":null,"doi":null,"arxiv_id":"2302.13971","raw_key":null,"title":"LLaMA: Open and Efficient Foundation Language Models","authors":null,"authors_text":"H","year":2023,"venue":"cs.CL","abstract":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","external_url":"https://arxiv.org/abs/2302.13971","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-06-28T23:42:50.060415+00:00","pith_arxiv_id":"2302.13971","created_at":"2026-05-08T18:13:54.078791+00:00","updated_at":"2026-06-28T23:42:50.060415+00:00","title_quality_ok":true,"display_title":"LLaMA: Open and Efficient Foundation Language Models","render_title":"LLaMA: Open and Efficient Foundation Language Models"},"hub":{"state":{"work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","tier":"mega_hub","tier_reason":"1,000+ Pith inbound or 100,000+ external citations","pith_inbound_count":1015,"external_cited_by_count":null,"distinct_field_count":43,"first_pith_cited_at":"2023-03-15T19:31:21+00:00","last_pith_cited_at":"2026-06-25T02:25:00+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"needed","recognition_status":"needed","updated_at":"2026-06-29T00:28:06.188153+00:00","tier_text":"mega_hub"},"tier":"mega_hub","role_counts":[{"context_role":"background","n":206},{"context_role":"method","n":19},{"context_role":"baseline","n":8},{"context_role":"other","n":6},{"context_role":"dataset","n":1},{"context_role":"extension","n":1}],"polarity_counts":[{"context_polarity":"background","n":198},{"context_polarity":"use_method","n":20},{"context_polarity":"unclear","n":13},{"context_polarity":"baseline","n":7},{"context_polarity":"extend","n":1},{"context_polarity":"support","n":1},{"context_polarity":"use_dataset","n":1}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T18:13:30.893278+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[]},"error":null,"updated_at":"2026-05-13T18:13:30.891244+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-13T18:13:30.887581+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":121},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":56},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":52},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":51},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":50},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":49},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":48},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":43},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":43},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":40},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":37},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":36},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":35},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":29},{"title":"Training Compute-Optimal Large Language Models","work_id":"b2faf28d-86b7-429c-bc42-469458efc246","shared_citers":29},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":27},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":27},{"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","shared_citers":27},{"title":"OPT: Open Pre-trained Transformer Language Models","work_id":"d7ff3b21-1fff-4cf4-952a-4714e3ef2307","shared_citers":26},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":25},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":25},{"title":"A Survey of Large Language Models","work_id":"de1b42b5-4a0a-4b1f-8c78-1f7fe21be6c9","shared_citers":24},{"title":"GLU Variants Improve Transformer","work_id":"17d0763c-1016-41ab-a478-478e890765eb","shared_citers":23},{"title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism","work_id":"c888e6d1-0b1d-43d6-9ef5-f0912a0efa1b","shared_citers":23}],"time_series":[{"n":34,"year":2023},{"n":26,"year":2024},{"n":4,"year":2025},{"n":338,"year":2026}]},"error":null,"updated_at":"2026-05-13T17:25:55.795642+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"fixed":1,"items":[{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-13T18:13:30.286958+00:00"},"reader_index":{"job_type":"reader_index","status":"succeeded","result":{"note":"annotated reader requires full-text/OA fetch; shell is wired for mega hubs","status":"reader queued"},"error":null,"updated_at":"2026-06-28T18:48:13.229300+00:00"},"recognition_alignment":{"job_type":"recognition_alignment","status":"succeeded","result":{"modules":["IndisputableMonolith.Cosmology.InflationModelsFromConfigDim","IndisputableMonolith.Education.PedagogyModelsFromConfigDim","IndisputableMonolith.RRF.Models","IndisputableMonolith.Chemistry.VanDerWaals","IndisputableMonolith.Physics.GrandUnificationFromRS","IndisputableMonolith.Linguistics.PhonemeInventoryBandFromRS","IndisputableMonolith.Foundation.AlexanderDuality","IndisputableMonolith.Foundation.AlexanderDualityProof"],"query_chars":567},"error":null,"updated_at":"2026-06-28T18:48:13.227420+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T18:13:30.889955+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T17:25:52.718711+00:00"}},"summary":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":121},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":56},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":52},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":51},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":50},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":49},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":48},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":43},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":43},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":40},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":37},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":36},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":35},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":29},{"title":"Training Compute-Optimal Large Language Models","work_id":"b2faf28d-86b7-429c-bc42-469458efc246","shared_citers":29},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":27},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":27},{"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","shared_citers":27},{"title":"OPT: Open Pre-trained Transformer Language Models","work_id":"d7ff3b21-1fff-4cf4-952a-4714e3ef2307","shared_citers":26},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":25},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":25},{"title":"A Survey of Large Language Models","work_id":"de1b42b5-4a0a-4b1f-8c78-1f7fe21be6c9","shared_citers":24},{"title":"GLU Variants Improve Transformer","work_id":"17d0763c-1016-41ab-a478-478e890765eb","shared_citers":23},{"title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism","work_id":"c888e6d1-0b1d-43d6-9ef5-f0912a0efa1b","shared_citers":23}],"time_series":[{"n":34,"year":2023},{"n":26,"year":2024},{"n":4,"year":2025},{"n":338,"year":2026}]},"authors":[]},"citers":{"total":1015,"items":[{"citing_arxiv_id":"2606.26538","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CascadeFormer: Depth-Tapered Transformers Motivated by Gradient Fan-in Asymmetry","primary_cat":"cs.LG","submitted_at":"2026-06-25T02:25:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CascadeFormer tapers Transformer width with depth based on gradient fan-in asymmetry to match uniform baselines in perplexity while cutting latency 8.6%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.23591","ref_index":58,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Quantifying the Agreement Between Data-Influence and Data-Similarity to Understand LLM Behavior","primary_cat":"cs.LG","submitted_at":"2026-06-22T17:00:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Data-similarity and data-influence produce significantly overlapping rankings of training documents for LLM outputs, with asymmetry allowing a favorable cost-accuracy trade-off.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18681","ref_index":31,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Moving Beyond Diversity: Visual Token Pruning as Subspace Reconstruction for Efficient VLMs","primary_cat":"cs.CV","submitted_at":"2026-06-17T04:45:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SPARE reformulates visual token pruning as column subset selection to minimize reconstruction error and uses anti-relevance for context-aware selection in VLMs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18524","ref_index":25,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"On the Residual Scaling of Looped Transformers: Stability and Transferability","primary_cat":"cs.LG","submitted_at":"2026-06-16T22:39:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Looped Transformers require residual scaling ε = 1/N due to correlated updates from weight sharing, unlike standard 1/sqrt(L), enabling learning rate transfer independent of loop count N.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.17816","ref_index":47,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Conservation Laws for Modern Neural Architectures","primary_cat":"cs.LG","submitted_at":"2026-06-16T11:44:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Unified framework characterizes conservation laws for gradient flow in feedforward networks with GELU/SiLU/SwiGLU, multihead attention with positional encodings, and MoE models under various gating.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.09131","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Late-Layer Fusion is Enough: Dual-Path Vision Token Routing for Multimodal Large Language Models under Visual Saturation","primary_cat":"cs.AI","submitted_at":"2026-06-08T07:28:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DPVR-LF routes saturated vision tokens into a one-layer side branch after layer 4, runs text-only processing through layers 5-17, and performs late fusion at the final layer to reduce visual computation while preserving multimodal performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.06947","ref_index":46,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DREAM: Dynamic Refinement of Early Assignment Mappings","primary_cat":"cs.IR","submitted_at":"2026-06-05T06:21:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DREAM proposes intent-aware tokenization, frozen-model evaluation, and dynamic beams to refine early SID assignments and improve cold-start performance in generative recommenders on Amazon benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04166","ref_index":28,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"End-to-End Text Line Detection and Ordering","primary_cat":"cs.CV","submitted_at":"2026-06-02T19:29:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Orli is an autoregressive image-to-sequence model that jointly detects text lines and determines their reading order on historical documents via chord-frame baselines, trained on 196k pages across ten scripts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03073","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Efficient Hyperparameter Optimization for LLM Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-06-02T03:02:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"JF-HPO jointly adapts model size and training budget as fidelity for efficient HPO in LLM RL, reporting up to 14.9x trial speedup and performance gains of 5.8-111.6% over the VeRL recipe.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02245","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"When Knowledge Is Not Free: Cost-Aware Evidence Selection in Retrieval-Augmented Generation","primary_cat":"cs.CL","submitted_at":"2026-06-01T13:39:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Defines cost-aware RAG with evidence cost tiers and shows static selectors are brittle while agentic LLM-based selection is promising but model-dependent.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01394","ref_index":23,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"UniD$^3$: A Knowledge Graph-Enhanced RAG Framework for Drug-Disease Discovery and Reasoning","primary_cat":"cs.CL","submitted_at":"2026-05-31T18:36:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"UniD³ applies KG-RAG with Llama 3.3-70B to build six knowledge graphs and generate large validated datasets for drug-disease matching, effectiveness assessment, and target analysis from biomedical literature.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01380","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Training-free image inversion for one-step diffusion models","primary_cat":"cs.CV","submitted_at":"2026-05-31T18:10:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TFinv proposes iterative noise alignment and suffix learning to enable training-free inversion and editing for one-step diffusion models, achieving SOTA performance and higher efficiency than multistep methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01155","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"When Data Is Scarce: Scaling Sparse Language Models with Repeated Training","primary_cat":"cs.LG","submitted_at":"2026-05-31T10:51:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Sparse LLMs in data-scarce multi-epoch regimes follow a scaling law based on active parameters, unique tokens, repetition count, and sparsity level that predicts performance and delays data saturation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07630","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Active Learning with Foundation Model Priors: Efficient Learning under Class Imbalance","primary_cat":"cs.LG","submitted_at":"2026-05-30T23:34:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Active learning with foundation model priors achieves over 50% annotation savings on imbalanced noisy datasets across image and text domains while maintaining performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.24894","ref_index":55,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"RWGBench: Evaluating Scholarly Positioning in Related Work Generation","primary_cat":"cs.DL","submitted_at":"2026-05-30T16:53:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"RWGBench is a citation-centric benchmark for related work generation built from 40k CS papers and a 100-paper test set, with multi-dimensional metrics that better match human expert judgment than standard similarity scores.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00746","ref_index":105,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Scaling Parallel Sequence Models to Foundation-Scale Vision Encoders","primary_cat":"cs.CV","submitted_at":"2026-05-30T14:29:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"C-GSPN scales 2D spatial propagation to foundation vision encoders via a fast CUDA kernel, compressed blocks, and two-stage distillation, matching ViT performance with 15% fewer parameters and 4x block speedup at 2K resolution.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00511","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Saliency-Aware Model Merging","primary_cat":"cs.LG","submitted_at":"2026-05-30T04:00:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SA-Merging extends SynFlow-style saliency to task vectors, adds merge-aware modulation and iterative pruning, and applies rank-wise decomposition to LoRAs, narrowing the gap to test-time adaptation on vision and language tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00494","ref_index":57,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"ProjQ: Project-and-Quantize for Adapter-Aware LLM Compression","primary_cat":"cs.LG","submitted_at":"2026-05-30T02:54:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ProjQ constrains post-training quantization noise to a low-rank manifold through orthogonal subspace projection, enabling better compensation by LoRA adapters and preserving greater model plasticity than standard PTQ.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00359","ref_index":136,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Next-Billion AI Index: The compass for AI utility and adoption in the global majority","primary_cat":"cs.CY","submitted_at":"2026-05-29T21:01:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces nexbax, a diagnostic framework with three themes and 10 dimensions for evaluating AI economic viability, operational practicality, and societal integrity in next-billion-user contexts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00306","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Rethinking the Role of Temperature in Large Language Model Distillation","primary_cat":"cs.LG","submitted_at":"2026-05-29T19:32:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Including temperature scaling makes forward KL divergence outperform reverse KL in LLM distillation on instruction benchmarks, overturning the τ=1 preference for reverse KL.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00241","ref_index":210,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"InfoAtlas: A Foundation Model for Zero-Shot Statistical Dependence Estimate","primary_cat":"cs.LG","submitted_at":"2026-05-29T18:16:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"InfoAtlas is a pretrained neural model for zero-shot mutual information estimation that matches state-of-the-art accuracy with 100x speedup and handles varying dimensions via a single model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00230","ref_index":34,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Pre-Training Analogue of Grokking in Language Models: Tracing Delayed Grammatical Generalization","primary_cat":"cs.LG","submitted_at":"2026-05-29T18:04:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An exposure-based split on BLiMP data reveals delayed generalization in five grammatical phenomena during LLM pre-training, with post-generalization shifts in concept vector predictiveness and attention patterns.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.31371","ref_index":48,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Softsign: Smooth Sign in Your Optimizer For Better Parameter Heterogeneity Handling","primary_cat":"cs.LG","submitted_at":"2026-05-29T14:41:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SoftSignum replaces hard sign with soft-sign in optimizers via temperature control and quantile scheduling, extends to SoftMuon, provides a convergence proof for stochastic non-convex settings, and reports better performance than sign-based methods and AdamW on deep learning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07604","ref_index":51,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Contribution Weights: A Geometrical Analysis of Self-Attention Transformers","primary_cat":"cs.LG","submitted_at":"2026-05-29T09:40:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Contribution Weights combine attention, value magnitude, and directional alignment to measure token influence more faithfully than attention alone, and show attention sinks actively suppress information via a convex sink-rate to output-norm relationship.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07603","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MetaEvo: A Meta-Optimization Framework for Experience-Driven Agent Evolution","primary_cat":"cs.LG","submitted_at":"2026-05-29T09:31:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"MetaEvo is a two-stage framework using preference optimization for principle abstraction followed by modular reuse to enable continual improvement of LLM agents on reasoning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07599","ref_index":59,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DiffoR: A Unified Continuous Generative Framework for Universal Ordinal Regression","primary_cat":"cs.LG","submitted_at":"2026-05-29T07:38:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DiffOR reformulates ordinal regression as continuous generative modeling using diffusion models with dual-decoupling to capture soft semantic transitions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30911","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"What Makes LVLMs Hallucinate Less? Unveiling the Architectural Factors Behind Hallucination Robustness","primary_cat":"cs.CV","submitted_at":"2026-05-29T06:47:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"The study links three LVLM architectural dimensions to three hallucination types via a new benchmark, finding that language foundation quality reduces co-occurrence errors, visual encoder strength reduces similarity errors, alignment reduces uncertainty errors, and joint visual-alignment improvement","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30889","ref_index":7,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MLIPilot: LLM-Driven Auto-Research for Machine-Learned Interatomic Potentials","primary_cat":"physics.chem-ph","submitted_at":"2026-05-29T06:25:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MLIPilot deploys LLM agents to autonomously optimize MACE MLIP training on molecular and periodic datasets by proposing code edits and validating against a domain-specific scorecard.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07597","ref_index":63,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Repetition Mismatch: Why Data Mixture Experiments Don't Scale and How to Fix Them","primary_cat":"cs.LG","submitted_at":"2026-05-29T06:08:57+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Repetition rate mismatch between small-scale proxies and target budgets is the main reason data mixture experiments do not scale; a subsampling procedure that equalizes repetition rates recovers optimal mixtures from 1/16-scale experiments.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11232","ref_index":51,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Every Act Has Its Price: Compressed Moral Composition in Frontier LLMs","primary_cat":"cs.CL","submitted_at":"2026-05-29T02:36:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Moral Trolley Arena shows frontier LLMs produce composite moral preferences that are compressed rather than additive functions of calibrated component act strengths across Moral Foundations Theory.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23780","ref_index":41,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Beyond Binary Edits Robust Multimodal Knowledge Editing with Adversarial Subspace Alignment","primary_cat":"cs.AI","submitted_at":"2026-05-22T15:46:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces Latent Adversarial Robustification and Rank-Constrained Subspace Learning to enable robust generalization in multimodal knowledge editing through adversarial subspace alignment.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23640","ref_index":55,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CachePrune: Privacy-Aware and Fine-Grained KV Cache Sharing for Efficient LLM Inference","primary_cat":"cs.CR","submitted_at":"2026-05-22T13:54:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CachePrune enables fine-grained, token-level KV cache reuse across LLM requests by masking sensitive segments, eliminating direct side-channel leakage while cutting TTFT by 4.5x and raising hit rates by 44% versus prior coarse-grained methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23389","ref_index":34,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"AlignedServe: Orchestrating Prefix-aware Batching to Build a High-throughput and Computing-efficient LLM Serving System","primary_cat":"cs.DC","submitted_at":"2026-05-22T09:00:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"AlignedServe uses prefix-aware batching, large CPU in-flight request pools, batch scheduling, and GPU-to-GPU KV prefetching to raise decoding throughput up to 1.98x and cut latency up to 7.4x versus prior serving systems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23294","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"NASiC: 3D NAND-based CAM-Selected Multibit CIM Architecture for Efficient On-Device Mixture-of-Experts LLM Inference","primary_cat":"cs.AR","submitted_at":"2026-05-22T07:10:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"NASiC fuses CAM-based expert selection and multibit CIM computation in 3D NAND into one cycle for MoE LLM inference, claiming 4-114.8x performance and 3.9-70x energy efficiency gains over prior designs with high accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23219","ref_index":21,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PaP-NF: Probabilistic Long-Term Time Series Forecasting via Prefix-as-Prompt Reprogramming and Normalizing Flows","primary_cat":"cs.LG","submitted_at":"2026-05-22T04:22:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PaP-NF uses prefix-as-prompt reprogramming of a frozen LLM to extract global context that conditions a normalizing flow decoder, producing probabilistic long-term time series forecasts evaluated by CRPS.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23198","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Label-Efficient Dataset Pruning via Semi-Supervised Pseudo-Labeling","primary_cat":"cs.LG","submitted_at":"2026-05-22T03:29:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SemiPrune uses a small labeled subset and semi-supervised pseudo-labeling to enable supervised dataset pruning methods, achieving state-of-the-art results on domain-specific, image-corrupted, and long-tailed datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23171","ref_index":13,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Understanding and Improving Noisy Embedding Techniques in Instruction Finetuning","primary_cat":"cs.LG","submitted_at":"2026-05-22T02:43:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SymNoise applies symmetric noise to embeddings during instruction fine-tuning and reports 6.7% higher AlpacaEval scores than NEFTune on LLaMA-2-7B.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23061","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Anytime Training with Schedule-Free Spectral Optimization","primary_cat":"cs.LG","submitted_at":"2026-05-21T21:50:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SF-NorMuon is a new schedule-free spectral optimizer that closes the gap with tuned AdamW on 125M-772M parameter models across 1-8x Chinchilla horizons while providing stationarity guarantees.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23054","ref_index":19,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Model Collapse as Cultural Evolution","primary_cat":"cs.CL","submitted_at":"2026-05-21T21:36:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Iterated learning theory predicts and LLM experiments confirm non-monotonic compositionality during self-training, reframing model collapse as cultural transmission with matching human regularization patterns.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23032","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Brain-LLM Alignment Tracks Training Data, Not Typology","primary_cat":"cs.CL","submitted_at":"2026-05-21T20:56:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Training-language dominance, not English inherent properties, determines brain-LLM alignment across English, Chinese, and French, with additional independent effects from typological distance concentrated in syntactic brain regions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22972","ref_index":66,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A mathematical theory of balancing relational generalization and memorization","primary_cat":"cs.LG","submitted_at":"2026-05-21T19:04:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces transitive inference with exceptions task and analytically shows kernel ridge regression balances relational generalization and memorization depending on representational geometry, with validation in finetuned language models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22819","ref_index":91,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Cambrian-P: Pose-Grounded Video Understanding","primary_cat":"cs.CV","submitted_at":"2026-05-21T17:59:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Cambrian-P adds per-frame camera pose tokens and a regression head to video MLLMs, delivering 4.5-6.5% gains on spatial benchmarks, generalization to other video QA tasks, and SOTA streaming pose estimation on ScanNet.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[89] Peter Tong, Ellis Brown, Penghao Wu, Sanghyun Woo, Adithya Jairam Vedagiri Iyer, Sai Charitha Akula, Shusheng Yang, Jihan Yang, Manoj Middepogu, Ziteng Wang, Xichen Pan, Ziteng Wang, Rob Fergus, Yann LeCun, and Saining Xie. Cambrian-1: A Fully Open, Vision-Centric Exploration of Multimodal LLMs. InNeurIPS, 2024. [90] Fabio Tosi, Yiyi Liao, Carolin Schmitt, and Andreas Geiger. Smd-nets: Stereo mixture density networks. InCVPR, 2021. [91] Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, et al. Llama: Open and efficient foundation language models.arXiv preprint arXiv:2302.13971, 2023. [92] Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay"},{"citing_arxiv_id":"2605.22769","ref_index":21,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Understanding Data Temporality Impact on Large Language Models Pre-training","primary_cat":"cs.CL","submitted_at":"2026-05-21T17:31:17+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22504","ref_index":31,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LACO: Adaptive Latent Communication for Collaborative Driving","primary_cat":"cs.AI","submitted_at":"2026-05-21T13:54:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LACO introduces Iterative Latent Deliberation, Cross-Horizon Saliency Attribution, and Structured Semantic Knowledge Distillation to enable low-latency latent communication in collaborative driving while preserving performance in CARLA simulations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22403","ref_index":79,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Translating Signals to Languages for sEMG-Based Activity Recognition","primary_cat":"cs.CV","submitted_at":"2026-05-21T12:31:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLM-sEMG maps sEMG signals to language via a dedicated mechanism to enable LLMs to perform accurate activity recognition.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22365","ref_index":126,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"TimeGuard: Channel-wise Pool Training for Backdoor Defense in Time Series Forecasting","primary_cat":"cs.CR","submitted_at":"2026-05-21T11:58:46+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22158","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"ST-SimDiff: Balancing Spatiotemporal Similarity and Difference for Efficient Video Understanding with MLLMs","primary_cat":"cs.AI","submitted_at":"2026-05-21T08:27:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ST-SimDiff is a training-free method using a spatio-temporal graph and dual similarity-difference selection to compress video tokens for MLLMs while retaining static and dynamic content.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22098","ref_index":61,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"TextTeacher: What Can Language Teach About Images?","primary_cat":"cs.CV","submitted_at":"2026-05-21T07:36:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TextTeacher uses frozen text embeddings from captions as semantic anchors to guide vision model training, improving ImageNet accuracy by up to 2.7 p.p. and transfer performance by 1.0 p.p. on average.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22013","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PointLLM-R: Enhancing 3D Point Cloud Reasoning via Chain-of-Thought","primary_cat":"cs.CV","submitted_at":"2026-05-21T05:19:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PointLLM-R is a 3D multimodal model fine-tuned on the new 55K-sample PoCoTI CoT dataset built via VLM-based refinement and Human-in-the-Loop Prompt Optimization, achieving SOTA on generative 3D classification and captioning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21987","ref_index":27,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Generative Conversational Recommender System","primary_cat":"cs.IR","submitted_at":"2026-05-21T04:36:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A single autoregressive model for conversational recommendation that uses semantic item IDs, predicts response intent and target first, then generates the response, reporting up to 29% Recall@1 gains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21969","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LLM Retrieval for Stable and Predictable Ad Recommendations","primary_cat":"cs.IR","submitted_at":"2026-05-21T03:58:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LLM-based semantic retrieval with hierarchical attributes and graph expansion improves stability and predictability in industrial ad recommendation systems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21683","ref_index":33,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Investigating Concept Alignment Using Implausible Category Members","primary_cat":"cs.AI","submitted_at":"2026-05-20T19:41:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AI models misalign with humans on concept boundaries when probed with implausible category members, such as classifying words as vehicles or vegetables as fruit.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21674","ref_index":48,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Adversarial Reframing: A Framework for Targeted Generation in Language Models","primary_cat":"cs.CR","submitted_at":"2026-05-20T19:31:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"THREAT uses coordinated LLMs in an iterative optimization loop to generate jailbreak prompts that achieve higher success rates and lower detection rates than previous methods across tested models and datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21408","ref_index":40,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"TCARD: Nearly Balanced Two-Level Designs with Treatment Cardinality Constraints with an Application to LLM Prompt Engineering","primary_cat":"stat.ME","submitted_at":"2026-05-20T17:06:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Proposes nearly balanced TCARDs that minimize the first two generalized word-length pattern components, defines Φ_BCD criterion linked to classical optimality, and constructs designs via coordinate exchange with simulation-calibrated weights for LLM prompt engineering.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21372","ref_index":43,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Closed Loop Dynamic Driving Data Mixture for Real-Synthetic Co-Training","primary_cat":"cs.CV","submitted_at":"2026-05-20T16:36:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AutoScale is a closed-loop data engine using Graph-RAE for scene representation and Cluster-GA for importance-based retrieval to improve real-synthetic co-training for autonomous driving.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21338","ref_index":96,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Text Analytics Evaluation Framework: A Case Study on LLMs and Social Media","primary_cat":"cs.CL","submitted_at":"2026-05-20T16:05:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Presents a new question-based evaluation framework for LLMs on aggregated social media text and reports that performance declines with input scale, task complexity, and numerical operations beyond 500 instances.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21260","ref_index":88,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"On the Cost and Benefit of Chain of Thought: A Learning-Theoretic Perspective","primary_cat":"cs.LG","submitted_at":"2026-05-20T14:51:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Chain of Thought risk decomposes into oracle-trajectory benefit and trajectory-mismatch cost, with stability determining bounded, linear, or exponential error growth.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"amplification factor that identifies bounded, linear, and exponential error-growth regimes. Together, these results give a precise theory of when CoT helps, when it hurts, and what controls the transition between the two. 1 Introduction The rise of large language models (LLMs) has reshaped the landscape of AI technologies. Large LLMs, such as ChatGPT [80], LLaMA [88], Claude [6], and Gemini [86], trained on internet-scale data, have demonstrated striking capabilities. Although these models are trained to predict the next word, one at a time in an autoregressive manner [18, 72, 73, 91, 114], they appear to have acquired remarkable abilities to answer user queries. Most remarkably, there is growing evidence that these models can perform forms"},{"citing_arxiv_id":"2605.21147","ref_index":41,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SMoA: Spectrum Modulation Adapter for Parameter-Efficient Fine-Tuning","primary_cat":"cs.LG","submitted_at":"2026-05-20T13:19:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SMoA is a new PEFT adapter that uses block-wise Hadamard-modulated low-rank branches on spectral partitions to cover more pretrained spectral directions than standard LoRA under a smaller parameter budget.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21131","ref_index":40,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"UniT: Unified Geometry Learning with Group Autoregressive Transformer","primary_cat":"cs.CV","submitted_at":"2026-05-20T13:04:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"UniT unifies online and offline 3D geometry perception via a Group Autoregressive Transformer that processes observation groups with anchor-free point map prediction and a scale-adaptive loss.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20950","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Focus-then-Context: Subject-Centric Progressive Visual Token Reduction for Vision-Language Models","primary_cat":"cs.CV","submitted_at":"2026-05-20T09:37:53+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SPpruner reduces visual tokens in VLMs via focus identification followed by context-aware scanning, retaining 22.2% tokens for 2.53x speedup on Qwen2.5-VL with negligible accuracy loss.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21543","ref_index":165,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Provable Joint Decontamination for Benchmarking Multiple Large Language Models","primary_cat":"cs.LG","submitted_at":"2026-05-20T09:16:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"JECS aggregates per-model conformal p-values via their maximum and reconstructs a conservative envelope of the max-p null distribution to select benchmarks with global contamination rate control.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20761","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Findings of the Counter Turing Test: AI-Generated Text Detection","primary_cat":"cs.CL","submitted_at":"2026-05-20T06:01:17+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20600","ref_index":32,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Head-Aware Key-Value Compression for Efficient Autoregressive Image Generation","primary_cat":"cs.CV","submitted_at":"2026-05-20T01:30:33+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"HeadKV compresses KV cache for autoregressive image generation via head-aware budget allocation, early head-type identification from consistent patterns, and stratified token eviction.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20390","ref_index":22,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"STELLAR: Scaling 3D Perception Large Models for Autonomous Driving","primary_cat":"cs.CV","submitted_at":"2026-05-19T18:40:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"STELLAR trains up to 500M-parameter multi-modal models on 50M driving scenes and reports empirical scaling trends plus new state-of-the-art results on the Waymo Open Dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20369","ref_index":40,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DEL: Digit Entropy Loss for Numerical Learning of Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-05-19T18:18:59+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DEL is a new loss for LLM numerical learning that applies supervised digit entropy optimization and extends to floating-point numbers, showing improved accuracy and distance metrics over prior methods on math benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19976","ref_index":42,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"RECIPE: Procedural Planning via Grounding in Instructional Video","primary_cat":"cs.CV","submitted_at":"2026-05-19T15:20:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"RECIPE improves visual procedural planners by rewarding plans according to their grounding quality in ASR transcripts via GRPO, yielding +7–8 in-domain and up to +16 zero-shot macro-accuracy gains over base models and outperforming supervised fine-tuning on seven benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19568","ref_index":38,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"m3BERT: A Modern, Multi-lingual, Matryoshka Bidirectional Encoder","primary_cat":"cs.CL","submitted_at":"2026-05-19T09:13:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"m3BERT uses a three-stage Matryoshka pretraining approach on a bidirectional encoder to support variable embedding sizes while outperforming prior models on large-scale retrieval tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19538","ref_index":30,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CaptchaMind: Training CAPTCHA Solvers via Reinforcement Learning with Explicit Reasoning Supervision","primary_cat":"cs.CV","submitted_at":"2026-05-19T08:38:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Presents CaptchaBench benchmark and CaptchaMind RL solver achieving 82.9% success on benchmark tasks and 71% on real-world CAPTCHAs via explicit reasoning process supervision.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19514","ref_index":48,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Position: The Turing-Completeness of Autoregressive Transformers Relies Heavily on Context Management","primary_cat":"cs.AI","submitted_at":"2026-05-19T08:12:33+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19481","ref_index":38,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"C2CServe: Leveraging NVLink-C2C for Elastic Serverless LLM Serving on MIG","primary_cat":"cs.OS","submitted_at":"2026-05-19T07:34:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"C2CServe is a request-granularity serverless LLM serving system that keeps weights in host memory and streams them via C2C to MIG instances, cutting cold-start latency up to 7.1x while preserving TTFT/TPOT under contention.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19425","ref_index":28,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"When to Stop Reusing: Dynamic Gradient Gating for Sample-Efficient RLVR","primary_cat":"cs.LG","submitted_at":"2026-05-19T06:23:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Dynamic Gradient Gating monitors lm_head gradient norms to safely reuse rollout batches in RLVR, achieving up to 2.93x sample efficiency and 2.14x wall-clock speedup across math, ALFWorld, WebShop, and QA tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20273","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Modality-Decoupled Online Recursive Editing","primary_cat":"cs.LG","submitted_at":"2026-05-19T03:11:54+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"M-ORE decouples text and visual update statistics in MLLMs and applies recursive low-rank edits in an orthogonal subspace to reduce cross-modal conflict and long-horizon interference.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19235","ref_index":39,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"GAE Falls Short in Imperfect-Information Self-Play Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-05-19T01:07:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GAE suffers from amplified variance in imperfect-info self-play RL; VRPO with Q-boosting and multi-step Expected SARSA(λ) reduces it and improves performance on mid-to-large games.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19206","ref_index":4,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CLUE: Adaptively Prioritized Contextual Cues by Leveraging a Unified Semantic Map for Effective Zero-Shot Object-Goal Navigation","primary_cat":"cs.RO","submitted_at":"2026-05-19T00:15:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CLUE adaptively weights room-type and object-co-location cues from an LLM to construct a unified semantic value map that improves success rate and efficiency in zero-shot object-goal navigation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19145","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PMF-CL: Pareto-Minimal-Forgetting Continual Learner for Conflicting Tasks","primary_cat":"cs.LG","submitted_at":"2026-05-18T21:53:49+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19119","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"GOAL: Graph-based Objective-Aligned Diffusion Solvers for Dynamic Multi-Objective Optimization","primary_cat":"cs.NE","submitted_at":"2026-05-18T21:11:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GOAL uses conditioned diffusion on relational graphs with typed edges to produce feasible multi-objective solutions for scheduling problems, reporting 100% feasibility and sub-0.2% MAPE on FSP, JSP, and FJSP up to 20 jobs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19102","ref_index":32,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Prompt Optimization for LLM Code Generation via Reinforcement Learning","primary_cat":"cs.SE","submitted_at":"2026-05-18T20:42:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A PPO agent with hybrid actions and test-driven rewards optimizes prompts for code LLMs, raising strict Pass@1 scores on MBPP+, HumanEval+, and APPS over prior methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20266","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Survey of Large Audio Language Models: Generalization, Trustworthiness, and Outlook","primary_cat":"cs.SD","submitted_at":"2026-05-18T20:21:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A survey of Large Audio Language Models that establishes a taxonomy of trustworthiness vulnerabilities and proposes a Defense-in-Depth roadmap for audio intelligence.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":", \"Train- ing language models to follow instructions with human feed- back,\"Advances in neural information processing systems, vol. 35, pp. 27 730-27 744, 2022. [2] J. Achiam, S. Adler, S. Agarwal, L. Ahmad, I. Akkaya, F. L. Aleman, D. Almeida, J. Altenschmidt, S. Altman, S. Anadkat et al., \"Gpt-4 technical report,\"arXiv preprint arXiv:2303.08774, 2023. [3] H. Touvron, T. Lavril, G. Izacard, X. Martinet, M.-A. Lachaux, T. Lacroix, B. Rozi `ere, N. Goyal, E. Hambro, F. Azharet al., \"Llama: Open and efficient foundation language models,\"arXiv preprint arXiv:2302.13971, 2023. [4] J. Bai, S. Bai, Y. Chu, Z. Cui, K. Dang, X. Deng, Y. Fan, W. Ge, Y. Han, F. Huanget al., \"Qwen technical report,\"arXiv preprint"},{"citing_arxiv_id":"2605.19018","ref_index":24,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LoRA vs. Full Fine-Tuning: A Theoretical Perspective","primary_cat":"cs.LG","submitted_at":"2026-05-18T18:40:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"In linear regression, LoRA can achieve lower excess risk than full fine-tuning when the pretraining-downstream difference is low-rank, and small LoRA ranks can improve generalization by acting as regularization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18678","ref_index":107,"ref_count":4,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Lance: Unified Multimodal Modeling by Multi-Task Synergy","primary_cat":"cs.CV","submitted_at":"2026-05-18T17:18:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Lance presents a dual-stream mixture-of-experts model with modality-aware positional encoding and staged multi-task training that outperforms prior open-source unified models on image and video generation while keeping strong understanding performance.","context_count":2,"top_context_role":"background","top_context_polarity":"background","context_text":"We therefore adopt autoregressive language modeling for understanding and flow matching for generation. Unified Visual Representations vs. Decoupled Visual Representations.Understanding and generation rely on different forms of visual information. Understanding mainly benefits from high-level semantic visual features that are well aligned with language (e.g., SigLIP 2 [108] or Qwen2.5-VL [5]), whereas generation relies on low-level latent representations that preserve appearance and spatiotemporal structure [110]. Some existing works [78] have explored shared visual representations, but a single representation may be insufficient to simultaneously satisfy semantic reasoning and high-fidelity synthesis. Meanwhile, recent studies [143, 148]"},{"citing_arxiv_id":"2605.18390","ref_index":76,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Vision Foundation Models as Generalist Tokenizers for Image Generation","primary_cat":"cs.CV","submitted_at":"2026-05-18T13:38:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"VFMTok builds a generalist image tokenizer on frozen VFMs using adaptive quantization and semantic alignment, delivering gFID 1.36 for autoregressive and 1.25 for continuous generation on ImageNet with 3x faster convergence.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18168","ref_index":42,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Acoustic Interference: A New Paradigm Weaponizing Acoustic Latent Semantic for Universal Jailbreak against Large Audio Language Models","primary_cat":"cs.CR","submitted_at":"2026-05-18T10:10:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AIA generates universal interference audio infused with Acoustic Latent Semantics to bypass LALM safety alignment, achieving SOTA attack success rates on 10 models across five datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18163","ref_index":41,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"TRACE: Trajectory Correction from Cross-layer Evidence for Hallucination Reduction","primary_cat":"cs.AI","submitted_at":"2026-05-18T10:08:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TRACE uses cross-layer candidate trajectories inside frozen LLMs to dynamically select and apply one of three correction operators, delivering mean gains of +12.26 MC1 and +8.65 MC2 points across 15 models and 3 benchmarks with no regressions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18111","ref_index":32,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"How Good LLMs Are at Answering Bangla Medical Visual Questions? Dataset and Benchmarking","primary_cat":"cs.CL","submitted_at":"2026-05-18T09:20:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Introduces BanglaMedVQA dataset of clinically validated image-question-answer pairs and benchmarks foundation models, finding substantially lower performance than on English MedVQA especially on diagnostic questions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18920","ref_index":7,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SynGR: Unleashing the Potential of Cross-Modal Synergy for Generative Recommendation","primary_cat":"cs.IR","submitted_at":"2026-05-18T09:03:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SynGR is a new framework for generative recommendation that constrains overreliance on single modalities to exploit synergistic cross-modal information for better item semantics and user preference modeling.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18074","ref_index":46,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"4DLidarOpen: An Open 4D FMCW Lidar Dataset for Motion-Aware Autonomous Driving","primary_cat":"cs.RO","submitted_at":"2026-05-18T08:55:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"4DLidarOpen is a new open dataset providing synchronized 4D FMCW Lidar velocity measurements, multi-Lidar and camera data, and 3D bounding-box annotations with track IDs to support benchmarks on 3D detection, BEV segmentation, flow prediction, and motion forecasting.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18071","ref_index":32,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"KVDrive: A Holistic Multi-Tier KV Cache Management System for Long-Context LLM Inference","primary_cat":"cs.CL","submitted_at":"2026-05-18T08:54:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"KVDrive introduces a multi-tier KV cache management system that achieves up to 1.74x higher throughput for long-context LLM inference through adaptive cache placement, pipeline restructuring, and cross-tier coordination while preserving accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17960","ref_index":57,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"From Detection to Response: A Deep Learning and Retrieval-Augmented Generation Framework for Network Intrusion Mitigation","primary_cat":"cs.CR","submitted_at":"2026-05-18T07:17:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Ensemble of three binary DNNs classifies network flows as benign, DoS or DDoS at 99.84% and 95.30% accuracy on CICIDS2018 and UNSW-NB15, paired with RAG to generate mitigation reports that outperform vanilla LLM outputs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17900","ref_index":34,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DuIVRS-2: An LLM-based Interactive Voice Response System for Large-scale POI Attribute Acquisition","primary_cat":"cs.AI","submitted_at":"2026-05-18T06:06:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"DuIVRS-2 deploys an LLM-driven IVR pipeline that processes 0.4 million calls per day at 83.9 percent task success rate using FSM-guided augmentation, selective CoT generation, and cooperative policy iteration.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17842","ref_index":39,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SNLP: Layer-Parallel Inference via Structured Newton Corrections","primary_cat":"cs.LG","submitted_at":"2026-05-18T04:28:16+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17787","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Revisiting the Adam-SGD Gap in LLM Pre-Training: The Role of Large Effective Learning Rates","primary_cat":"cs.LG","submitted_at":"2026-05-18T03:09:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The Adam-SGD gap in large-batch LLM pre-training arises mainly from SGD's restricted effective learning rates caused by small gradients and output-layer spikes; clipping lets SGD recover nearly all of Adam's performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17679","ref_index":62,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PULSE: Agentic Investigation with Passive Sensing for Proactive Intervention in Cancer Survivorship","primary_cat":"cs.HC","submitted_at":"2026-05-17T22:39:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PULSE demonstrates that agentic LLM-based investigation of passive smartphone sensing data achieves balanced accuracies of 0.743 (with diary) and 0.713 (sensing-only) for predicting emotion regulation desire and intervention availability in 50 cancer survivors.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17601","ref_index":133,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"From a Single Demonstration to a General Policy for Contact-Rich Manipulation","primary_cat":"cs.RO","submitted_at":"2026-05-17T18:58:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A one-shot LfD framework abstracts a single demonstration into environmental-constraint primitives, then uses self-exploration, human corrections, and compliant recovery to produce a policy that generalizes across poses and geometries, achieving over 90% success on seven real-world multi-stage tasks","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Babi ˇc, \"Robotic assembly solution by human-in-the-loop teaching method based on real-time stiffness modulation,\"Autonomous Robots, vol. 42, pp. 1-17, 2018. [132] J. Achiam, S. Adler, S. Agarwal, L. Ahmad, I. Akkaya, F. L. Aleman, D. Almeida, J. Altenschmidt, S. Altman, S. Anadkat,et al., \"Gpt-4 technical report,\"arXiv preprint arXiv:2303.08774, 2023. [133] H. Touvron, T. Lavril, G. Izacard, X. Martinet, M.-A. Lachaux, T. Lacroix, B. Rozi`ere, N. Goyal, E. Hambro, F. Azhar,et al., \"Llama: Open and efficient foundation language models,\"arXiv preprint arXiv:2302.13971, 2023. [134] J. Bai, S. Bai, Y . Chu, Z. Cui, K. Dang, X. Deng, Y . Fan, W. Ge, Y . Han, F. Huang,et al., \"Qwen technical report,\"arXiv preprint"},{"citing_arxiv_id":"2605.17565","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Generalization or Memorization? Brittleness Testing for Chess-Trained Language Models","primary_cat":"cs.AI","submitted_at":"2026-05-17T17:49:07+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A compact 25M chess move predictor exceeds larger fine-tuned models on puzzles, indicating memorization in earlier claims, while LLM-Modulo raises general LLM move accuracy from 1.2% to 21.2% and validity to 95.3%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18904","ref_index":77,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Dynamic Model Merging Made Slim","primary_cat":"cs.LG","submitted_at":"2026-05-17T13:36:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DiDi-Merging achieves dynamic model merging performance matching or exceeding prior methods while using only 1.24x to 1.4x the parameters of a single fine-tuned model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17435","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"BELIEF: Structured Evidence Modeling and Uncertainty-Aware Fusion for Biomedical Question Answering","primary_cat":"cs.CL","submitted_at":"2026-05-17T12:58:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"BELIEF improves closed-set biomedical QA by converting documents to structured evidence objects and fusing D-S symbolic belief estimation with LLM inference through reliability-aware arbitration.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17428","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Progressive Generalization Augmentation with Deeply Coupled RND-PPO and Domain-Prioritized Noise Injection for Robust Crop Management Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-05-17T12:48:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Introduces Progressive Generalization Augmentation, deeply coupled RND-PPO, and domain-prioritized noise injection, reporting yield and efficiency gains plus higher retention under temperature perturbations in gym-DSSAT maize tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17413","ref_index":37,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Ablating Safety: Mechanisms for Removing Alignment in Language Models for Security Applications","primary_cat":"cs.CR","submitted_at":"2026-05-17T12:18:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Empirical comparison of alignment ablation methods on a 60-prompt security evaluation suite shows task-only LoRA achieves 0.87 mean security score with 0.13 unsafe compliance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17365","ref_index":19,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Memory-Augmented Query Intent Understanding for Efficient Chat-based Image Retrieval","primary_cat":"cs.CV","submitted_at":"2026-05-17T10:17:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MAQIU adds a memorization module and recall mechanism to update query intent dynamically in chat-based image retrieval, cutting FLOPs by 86.4% versus ChatIR while improving results.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17341","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Single-Sample Black-Box Membership Inference Attack against Vision-Language Models via Cross-modal Semantic Alignment","primary_cat":"cs.CV","submitted_at":"2026-05-17T09:21:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A cross-modal alignment attack achieves AUC 0.821 for single-sample black-box membership inference on VLMs such as LLaVA-1.5 by quantifying image-generated caption similarity.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":100,"offset":0}}