{"total":192,"items":[{"citing_arxiv_id":"2604.27674","ref_index":51,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"One Single Hub Text Breaks CLIP: Identifying Vulnerabilities in Cross-Modal Encoders via Hubness","primary_cat":"cs.CL","submitted_at":"2026-04-30T10:08:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A single hub text can unreasonably match many images in CLIP-based similarity, exposing vulnerabilities in cross-modal encoders for caption evaluation and retrieval.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27607","ref_index":2,"ref_count":2,"confidence":0.35,"is_internal_anchor":false,"paper_title":"JaiTTS: A Thai Voice Cloning Model","primary_cat":"cs.CL","submitted_at":"2026-04-30T08:59:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"JaiTTS-v1.0 achieves 1.94% CER on short Thai speech, beating human ground truth of 1.98%, matches humans on long speech, and wins 283 of 400 human comparisons against commercial systems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27495","ref_index":42,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Debiasing Reward Models via Causally Motivated Inference-Time Intervention","primary_cat":"cs.CL","submitted_at":"2026-04-30T06:49:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Neuron-level inference-time intervention reduces multiple biases in reward models, enabling 2B and 7B models to match 70B performance on LLM alignment benchmarks without trade-offs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27340","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Investigating More Explainable and Partition-Free Compositionality Estimation for LLMs: A Rule-Generation Perspective","primary_cat":"cs.AI","submitted_at":"2026-04-30T02:33:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A rule-generation perspective lets LLMs write programs as rules for data mapping and applies complexity theory to estimate their compositionality, tested on string-to-grid tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26622","ref_index":33,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"OCR-Memory: Optical Context Retrieval for Long-Horizon Agent Memory","primary_cat":"cs.CL","submitted_at":"2026-04-29T12:49:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"OCR-Memory encodes agent trajectories as images with visual anchors and retrieves verbatim text via locate-and-transcribe, yielding gains on long-horizon benchmarks under strict context limits.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.25860","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Luminol-AIDetect: Fast Zero-shot Machine-Generated Text Detection based on Perplexity under Text Shuffling","primary_cat":"cs.CL","submitted_at":"2026-04-28T16:58:55+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.25693","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"RADD: Retrieval-Augmented Discrete Diffusion for Multi-Modal Knowledge Graph Completion","primary_cat":"cs.AI","submitted_at":"2026-04-28T14:21:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RADD decouples retrieval and reranking in multi-modal KGC via a relation-aware KGE retriever and conditional discrete denoiser, reporting state-of-the-art results on three benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.25525","ref_index":44,"ref_count":2,"confidence":0.35,"is_internal_anchor":false,"paper_title":"From Chatbots to Confidants: A Cross-Cultural Study of LLM Adoption for Emotional Support","primary_cat":"cs.CL","submitted_at":"2026-04-28T11:48:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Cross-cultural survey of 4,641 participants shows LLM emotional support adoption varies widely by country and demographics, with socioeconomic status as strongest predictor of trust and use, and English-speaking nations more accepting than others in Europe.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.24987","ref_index":26,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Assessing Y-Axis Influence: Bias in Multimodal Language Models on Chart-to-Table Translation","primary_cat":"cs.AI","submitted_at":"2026-04-27T20:47:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Y-axis features such as major tick digit length, number of ticks, value range, and format introduce significant biases in multimodal models during chart-to-table tasks, with y-axis prompting improving performance for some models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.24536","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Generating Place-Based Compromises Between Two Points of View","primary_cat":"cs.CL","submitted_at":"2026-04-27T14:33:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Empathic similarity feedback in prompts generates more acceptable compromises than chain-of-thought, and margin-based training on the resulting data lets smaller models produce them without ongoing empathy estimation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23779","ref_index":20,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"GLIER: Generative Legal Inference and Evidence Ranking for Legal Case Retrieval","primary_cat":"cs.IR","submitted_at":"2026-04-26T16:02:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GLIER reformulates legal case retrieval as generative inference over latent legal variables like charges and elements, then fuses generative, structural, and lexical signals, outperforming baselines on LeCaRD datasets with strong performance at 10% training data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23742","ref_index":43,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"RTCFake: Speech Deepfake Detection in Real-Time Communication","primary_cat":"cs.SD","submitted_at":"2026-04-26T14:42:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RTCFake is the first large-scale dataset of real-time communication speech deepfakes paired with offline versions, paired with a phoneme-guided consistency learning method that improves cross-platform and noise-robust detection.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23543","ref_index":41,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Pref-CTRL: Preference Driven LLM Alignment using Representation Editing","primary_cat":"cs.CL","submitted_at":"2026-04-26T05:41:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Pref-CTRL trains a multi-objective value function on preferences to guide representation editing for LLM alignment, outperforming RE-Control on benchmarks with better out-of-domain generalization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23407","ref_index":16,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"PushupBench: Your VLM is not good at counting pushups","primary_cat":"cs.CV","submitted_at":"2026-04-25T18:58:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"VLMs reach only 42.1% exact accuracy on counting pushups in videos, with weaker models exploiting modal counts, and 1k-sample fine-tuning transfers gains to MVBench, PerceptionTest, and TVBench.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23345","ref_index":30,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Bridging Reasoning and Action: Hybrid LLM-RL Framework for Efficient Cross-Domain Task-Oriented Dialogue","primary_cat":"cs.CL","submitted_at":"2026-04-25T15:07:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"VLK-RL verifies LLM-derived constraints and maps them into structured state representations to improve RL performance on long-horizon cross-domain dialogue tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23282","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Bridging the Pose-Semantic Gap: A Cascade Framework for Text-Based Person Anomaly Search","primary_cat":"cs.CV","submitted_at":"2026-04-25T12:53:15+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23267","ref_index":2,"ref_count":2,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Fine-tuning vs. In-context Learning in Large Language Models: A Formal Language Learning Perspective","primary_cat":"cs.CL","submitted_at":"2026-04-25T12:19:25+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A controlled formal language task reveals fine-tuning outperforms in-context learning on in-distribution generalization but equals it on out-of-distribution, with ICL showing greater sensitivity to model size and tokenization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23198","ref_index":29,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"StoryTR: Narrative-Centric Video Temporal Retrieval with Theory of Mind Reasoning","primary_cat":"cs.AI","submitted_at":"2026-04-25T08:09:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"StoryTR is a new benchmark and agentic data pipeline that adds explicit Theory of Mind reasoning chains to train smaller video retrieval models, yielding a 15% relative IoU gain over larger baselines on narrative content.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23194","ref_index":37,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"From Coarse to Fine: Self-Adaptive Hierarchical Planning for LLM Agents","primary_cat":"cs.AI","submitted_at":"2026-04-25T07:54:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"AdaPlan-H enables LLM agents to generate self-adaptive hierarchical plans that adjust detail level to task difficulty, improving success rates in multi-step tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23130","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"From Concept-Aligned Tokens to Vulnerable Features: Mechanistic Localization of Jailbreaks","primary_cat":"cs.CL","submitted_at":"2026-04-25T03:54:03+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23108","ref_index":28,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Mixture of Heterogeneous Grouped Experts for Language Modeling","primary_cat":"cs.CL","submitted_at":"2026-04-25T02:05:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MoHGE achieves standard MoE performance with 20% fewer parameters and balanced GPU utilization via grouped heterogeneous experts, two-level routing, and specialized auxiliary losses.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23051","ref_index":47,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Evaluating Temporal Consistency in Multi-Turn Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-24T22:44:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Language models frequently violate temporal scope stability in multi-turn dialogues by drifting toward present-day assumptions even when they possess the correct facts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22937","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"AutoPyVerifier: Learning Compact Executable Verifiers for Large Language Model Outputs","primary_cat":"cs.CL","submitted_at":"2026-04-24T18:22:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AutoPyVerifier learns compact sets of executable Python verifiers from labeled LLM outputs via LLM synthesis and DAG search, improving objective prediction by up to 55 F1 points and downstream LLM accuracy by up to 17 points.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22678","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"BERAG: Bayesian Ensemble Retrieval-Augmented Generation for Knowledge-based Visual Question Answering","primary_cat":"cs.CL","submitted_at":"2026-04-24T16:01:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"BERAG applies Bayesian ensemble weighting of individual documents via token-by-token posterior updates in retrieval-augmented generation, yielding gains on knowledge-based visual QA tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22597","ref_index":39,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Rethinking Math Reasoning Evaluation: A Robust LLM-as-a-Judge Framework Beyond Symbolic Rigidity","primary_cat":"cs.AI","submitted_at":"2026-04-24T14:25:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"An LLM-as-a-judge evaluation framework for math reasoning outperforms symbolic methods by accurately assessing diverse answer representations and formats.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22558","ref_index":27,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"SOLAR-RL: Semi-Online Long-horizon Assignment Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-04-24T13:53:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SOLAR-RL assigns dense step-level rewards from static trajectory data by detecting first failure points and applying target-aligned shaping to improve long-horizon GUI task completion without full online interactions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22888","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"RouteGuard: Internal-Signal Detection of Skill Poisoning in LLM Agents","primary_cat":"cs.CR","submitted_at":"2026-04-24T09:07:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RouteGuard uses response-conditioned attention and hidden-state alignment to detect skill poisoning in LLM agents, achieving 0.8834 F1 on Skill-Inject benchmarks and recovering 90.51% of attacks missed by lexical screening.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22192","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"CharTide: Data-Centric Chart-to-Code Generation via Tri-Perspective Tuning and Inquiry-Driven Evolution","primary_cat":"cs.CV","submitted_at":"2026-04-24T03:39:51+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22166","ref_index":39,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Fine-Grained Analysis of Shared Syntactic Mechanisms in Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-24T02:29:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Language models employ a highly localized shared mechanism for filler-gap dependencies but no unified mechanism for NPI licensing, and activation patching generalizes better than supervised alignment search.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17105","ref_index":57,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"How Tokenization Limits Phonological Knowledge Representation in Language Models and How to Improve Them","primary_cat":"cs.CL","submitted_at":"2026-04-18T18:40:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Subword tokenization impairs phonological knowledge encoding in LMs, but an IPA-based fine-tuning method restores it with minimal impact on other capabilities.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17008","ref_index":30,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"BIASEDTALES-ML: A Multilingual Dataset for Analyzing Narrative Attribute Distributions in LLM-Generated Stories","primary_cat":"cs.CL","submitted_at":"2026-04-18T14:39:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"BiasedTales-ML provides a parallel multilingual corpus of LLM-generated children's stories that reveals substantial cross-lingual differences in narrative attributes not captured by English-centric analyses.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16995","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"SPS: Steering Probability Squeezing for Better Exploration in Reinforcement Learning for Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-18T13:49:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SPS interleaves RL and IRL to counteract probability squeezing in LLM reasoning trajectories, improving Pass@k on five benchmarks while identifying an empirical upper bound on multi-sample performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16930","ref_index":42,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"CoGR-MoE: Concept-Guided Expert Routing with Consistent Selection and Flexible Reasoning for Visual Question Answering","primary_cat":"cs.CV","submitted_at":"2026-04-18T09:28:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CoGR-MoE improves VQA by using concept-guided expert routing with option feature reweighting and contrastive learning to achieve consistent yet flexible reasoning across answer options.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16902","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Beyond Text-Dominance: Understanding Modality Preference of Omni-modal Large Language Models","primary_cat":"cs.AI","submitted_at":"2026-04-18T08:25:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Omni-modal LLMs exhibit visual preference that emerges in mid-to-late layers, enabling hallucination detection without task-specific training.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16686","ref_index":28,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"No-Worse Context-Aware Decoding: Preventing Neutral Regression in Context-Conditioned Generation","primary_cat":"cs.CL","submitted_at":"2026-04-17T20:45:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"NWCAD uses a two-stream setup with a two-stage gate to prevent accuracy drops on baseline-correct items under non-informative contexts while retaining gains from helpful contexts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16593","ref_index":90,"ref_count":2,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Revisiting a Pain in the Neck: A Semantic Reasoning Benchmark for Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-17T17:56:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SemanticQA unifies prior multiword expression datasets into a benchmark that reveals substantial performance variation among language models on semantic reasoning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16058","ref_index":27,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"LLMSniffer: Detecting LLM-Generated Code via GraphCodeBERT and Supervised Contrastive Learning","primary_cat":"cs.SE","submitted_at":"2026-04-17T13:32:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"LLMSniffer improves detection of LLM-generated code on GPTSniffer and Whodunit benchmarks by fine-tuning GraphCodeBERT via two-stage supervised contrastive learning plus preprocessing and MLP classification.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16029","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Cut Your Losses! Learning to Prune Paths Early for Efficient Parallel Reasoning","primary_cat":"cs.CL","submitted_at":"2026-04-17T13:00:22+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15873","ref_index":51,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"How Hypocritical Is Your LLM judge? Listener-Speaker Asymmetries in the Pragmatic Competence of Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-17T09:22:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs perform substantially better as pragmatic listeners judging language than as speakers generating it, revealing weak alignment between the two roles.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15847","ref_index":46,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"CiPO: Counterfactual Unlearning for Large Reasoning Models through Iterative Preference Optimization","primary_cat":"cs.CL","submitted_at":"2026-04-17T08:56:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CiPO removes undesired knowledge from both intermediate reasoning steps and final answers in large reasoning models by iteratively optimizing preferences toward valid counterfactual traces while keeping overall reasoning performance intact.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15741","ref_index":62,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Learning Uncertainty from Sequential Internal Dispersion in Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-17T06:31:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SIVR detects LLM hallucinations by learning from token-wise and layer-wise variance patterns in internal hidden states, outperforming baselines with better generalization and less training data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16543","ref_index":52,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Conjunctive Prompt Attacks in Multi-Agent LLM Systems","primary_cat":"cs.MA","submitted_at":"2026-04-17T02:31:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Conjunctive prompt attacks split adversarial elements across agents and routing paths in multi-agent LLM systems, evading isolated defenses and succeeding through topology-aware optimization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15607","ref_index":72,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Imperfectly Cooperative Human-AI Interactions: Comparing the Impacts of Human and AI Attributes in Simulated and User Studies","primary_cat":"cs.CL","submitted_at":"2026-04-17T01:10:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"In real human subjects, AI transparency impacts imperfectly cooperative interactions far more than personality traits, unlike simulations where both are comparably influential.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15602","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"GroupDPO: Memory efficient Group-wise Direct Preference Optimization","primary_cat":"cs.CL","submitted_at":"2026-04-17T00:56:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GroupDPO decouples group-wise preference optimization during backpropagation to cut peak memory while keeping the same gradients, allowing larger groups and consistent gains over single-pair DPO plus an NLL term on positives.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15301","ref_index":76,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Think in Latent Thoughts: A New Paradigm for Gloss-Free Sign Language Translation","primary_cat":"cs.CV","submitted_at":"2026-04-16T17:57:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A new SLT framework uses latent thoughts as a middle reasoning layer and plan-then-ground decoding to improve coherence and faithfulness in gloss-free sign language translation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15188","ref_index":35,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"VisPCO: Visual Token Pruning Configuration Optimization via Budget-Aware Pareto-Frontier Learning for Vision-Language Models","primary_cat":"cs.CV","submitted_at":"2026-04-16T16:21:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"VisPCO uses continuous relaxation, straight-through estimators, and budget-aware Pareto-frontier learning to automatically discover optimal visual token pruning configurations that approximate grid-search results across VLMs and benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15109","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"IUQ: Interrogative Uncertainty Quantification for Long-Form Large Language Model Generation","primary_cat":"cs.CL","submitted_at":"2026-04-16T15:03:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"IUQ quantifies claim-level uncertainty in long-form LLM generation by combining inter-sample consistency and intra-sample faithfulness through an interrogate-then-respond approach and outperforms baselines on two datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15041","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"HintPilot: LLM-based Compiler Hint Synthesis for Code Optimization","primary_cat":"cs.SE","submitted_at":"2026-04-16T14:07:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"HintPilot synthesizes semantics-preserving compiler hints via retrieval-augmented LLM generation and profiling-guided refinement, delivering up to 6.88x geometric mean speedup over -Ofast on PolyBench and HumanEval-CPP while preserving correctness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15022","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Route to Rome Attack: Directing LLM Routers to Expensive Models via Adversarial Suffix Optimization","primary_cat":"cs.CR","submitted_at":"2026-04-16T13:51:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"R²A uses a hybrid ensemble surrogate router and suffix optimization to significantly increase black-box LLM router selection of expensive models across query distributions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14922","ref_index":38,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"LongAct: Harnessing Intrinsic Activation Patterns for Long-Context Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-04-16T12:06:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LongAct uses saliency from high-magnitude activations to guide sparse weight updates in long-context RL, yielding about 8% gains on LongBench v2 across multiple algorithms.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}