{"total":12,"items":[{"citing_arxiv_id":"2606.24551","ref_index":141,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"GUI vs. CLI: Execution Bottlenecks in Screen-Only and Skill-Mediated Computer-Use Agents","primary_cat":"cs.AI","submitted_at":"2026-06-22T17:05:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A matched benchmark shows GUI computer-use agents at 59.1% full pass rate versus 48.2% for original-skill CLI agents, rising to 69.3% with verifier-guided augmentation, indicating modality-specific execution bottlenecks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18005","ref_index":233,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"LLM Consumer Behavior Theory: Foundations of a Novel Research Field","primary_cat":"cs.AI","submitted_at":"2026-06-16T14:51:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Introduces LLM Consumer Behavior Theory to analyze consumer behavior when LLMs serve as autonomous decision-making agents in markets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.06738","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Modular Monolingual Adaptation using Pretrained Language Models","primary_cat":"cs.CL","submitted_at":"2026-06-04T21:51:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Replacing tokens, freezing the corresponding embeddings, and tuning the rest of the model improves NLU performance on low-resource languages compared to full fine-tuning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04177","ref_index":116,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A Systematic Analysis of Linguistic Features in AI-Generated Text Detection Across Domains and Models","primary_cat":"cs.CL","submitted_at":"2026-06-02T19:46:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Lexical richness is a robust linguistic signal for AI-generated text detection across models and domains, while most other features are context-dependent.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10714","ref_index":128,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Why Low-Resource NLP Needs More Than Cross-Lingual Transfer: Lessons Learned from Luxembourgish","primary_cat":"cs.CL","submitted_at":"2026-05-11T15:24:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Cross-lingual transfer and language-specific data efforts are interdependent and complementary for effective low-resource NLP, as demonstrated through Luxembourgish case studies and synthesis.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05003","ref_index":116,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Misaligned by Reward: Socially Undesirable Preferences in LLMs","primary_cat":"cs.CL","submitted_at":"2026-05-06T15:04:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Reward models for LLMs frequently select socially undesirable options across four social domains, show no overall best performer, and exhibit a bias-avoidance versus context-sensitivity trade-off.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04157","ref_index":200,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"FMI_SU_Yotkova_Kastreva at SemEval-2026 Task 13: Lightweight Detection of LLM-Generated Code via Stylometric Signals","primary_cat":"cs.CL","submitted_at":"2026-05-05T18:00:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"A feature-based decision tree with parsing-derived signals and heuristics detects LLM-generated code in a lightweight, CPU-only setup for SemEval-2026 Task 13.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02712","ref_index":129,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"mdok-style at SemEval-2026 Task 10: Finetuning LLMs for Conspiracy Detection","primary_cat":"cs.CL","submitted_at":"2026-05-04T15:17:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"Finetuning Qwen3-32B with data augmentation and self-training achieves competitive 8th-place ranking on SemEval-2026 conspiracy detection.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02695","ref_index":128,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"mdok-style at SemEval-2026 Task 9: Finetuning LLMs for Multilingual Polarization Detection","primary_cat":"cs.CL","submitted_at":"2026-05-04T15:08:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"Finetuning LLMs with QLoRA and multilingual data augmentation for polarization detection, type, and manifestation in SemEval-2026 Task 9.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01017","ref_index":116,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Psychologically Potent, Computationally Invisible: LLMs Generate Social-Comparison-Eliciting Posts They Fail to Detect","primary_cat":"cs.CL","submitted_at":"2026-05-01T18:28:10+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21716","ref_index":115,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"From If-Statements to ML Pipelines: Revisiting Bias in Code-Generation","primary_cat":"cs.CL","submitted_at":"2026-04-23T14:22:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"LLM-generated ML pipelines show higher bias (87.7% sensitive attributes) than conditional statements (59.2%), indicating that simple if-statement tests underestimate bias risk in practical code generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21365","ref_index":127,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"mcdok at SemEval-2026 Task 13: Finetuning LLMs for Detection of Machine-Generated Code","primary_cat":"cs.LG","submitted_at":"2026-04-23T07:29:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"Fine-tuning LLMs by adapting the mdok approach produces competitive results on binary detection, source attribution, and hybrid/adversarial code identification in SemEval-2026 Task 13.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}