{"total":14,"items":[{"citing_arxiv_id":"2606.12922","ref_index":5,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Polar: A Benchmark for Evaluating Political Bias in LLMs","primary_cat":"cs.CL","submitted_at":"2026-06-11T05:26:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Polar is a new cross-context benchmark showing LLM political bias measurements are not fixed but vary with country, issue, model, and language.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.12088","ref_index":98,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Debiasing Without Protected Attributes: Latent Concept Erasure from Textual Profiles","primary_cat":"cs.CL","submitted_at":"2026-06-10T13:49:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"H-SAL erases latent concepts from text profiles using self-descriptions as implicit debiasing signals and shows competitive performance on a new multi-domain Stack Exchange helpfulness benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02776","ref_index":48,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Topics as Proxies for Sociodemographics: How Conversational Context Affects LLM Answers","primary_cat":"cs.CL","submitted_at":"2026-06-01T18:38:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"LLMs show minimal sociodemographic disparities in advice because they infer user demographics poorly from history; conversation topics are the main predictor and act as proxies for groups.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00467","ref_index":31,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"On the Limits of LLM Adaptability: Impact of Model-Internalized Priors on Annotation Task Performance","primary_cat":"cs.CL","submitted_at":"2026-05-30T01:21:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs correct only 34.8% of zero-shot annotation errors via prompting, and Definition-Specific Familiarity correlates positively with performance (partial r = +0.41) while memorization metrics do not.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07622","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Is She Even Relevant? When BERT Ignores Explicit Gender Cues","primary_cat":"cs.CL","submitted_at":"2026-05-08T11:48:22+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A Dutch BERT model encodes gender linearly by epoch 20 but does not dynamically update its representations when explicit female cues contradict learned stereotypical associations in short sentence templates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01048","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Compared to What? Baselines and Metrics for Counterfactual Prompting","primary_cat":"cs.CL","submitted_at":"2026-05-01T19:23:33+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Counterfactual prompting effects on LLMs are often indistinguishable from those caused by meaning-preserving paraphrases, causing most previously reported demographic sensitivities to disappear under proper statistical comparison.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17398","ref_index":6,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Contrastive Analysis of Linguistic Representations in Large Language Model Outputs through Structured Synthetic Data Generation and Abstracted N-gram Associations","primary_cat":"cs.CL","submitted_at":"2026-04-19T12:02:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A methodological framework detects subtle group-associated linguistic biases in LLM outputs by generating controlled synthetic minimal pairs, abstracting n-grams, and ranking high-signal fragments with a PMI variant for expert review.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.05483","ref_index":25,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Can We Trust a Black-box LLM? LLM Untrustworthy Boundary Detection via Bias-Diffusion and Multi-Agent Reinforcement Learning","primary_cat":"cs.AI","submitted_at":"2026-04-07T06:24:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GMRL-BD detects untrustworthy topic boundaries for black-box LLMs by combining bias-diffusion on a Wikipedia KG with multi-agent RL, supported by a released dataset labeling biases in models like Llama2 and Qwen2.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06213","ref_index":20,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Invisible Influences: Investigating Implicit Intersectional Biases through Persona Engineering in Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-03-16T15:57:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The paper proposes the BADx metric to quantify persona-induced amplification of implicit intersectional biases in five LLMs, showing that context modulates bias beyond what static embedding tests capture.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.04244","ref_index":108,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Benchmark Data Contamination of Large Language Models: A Survey","primary_cat":"cs.CL","submitted_at":"2024-06-06T16:41:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"A survey reviewing benchmark data contamination in LLMs, its impact on evaluation, and alternative assessment approaches.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Another strategy to detecting BDC involves comparing the performance of model-generation on evaluation datasets. Common examples such as comparing the similarity Common methods include , Vol. 1, No. 1, Article . Publication date: June 2024. Benchmark Data Contamination of Large Language Models: A Survey 11 comparing the similarity [82, 96], distribution [34], perplexity [89], and generation order [108] of the generated content with that of the evaluated dataset. Additionally, comparing the performance differences of LLMs on datasets across different time periods can serve as a comparison-based method for detecting BDC [61, 121]. We have identified six representative works that adopt this approach, which we have categorized into three subcategories: content comparison, sequential"},{"citing_arxiv_id":"2305.14233","ref_index":149,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Enhancing Chat Language Models by Scaling High-quality Instructional Conversations","primary_cat":"cs.CL","submitted_at":"2023-05-23T16:49:14+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"UltraChat supplies 1.5 million high-quality multi-turn dialogues that, when used to fine-tune LLaMA, produce UltraLLaMA, which outperforms prior open-source chat models including Vicuna.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2304.01373","ref_index":242,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Pythia: A Suite for Analyzing Large Language Models Across Training and Scaling","primary_cat":"cs.CL","submitted_at":"2023-04-03T20:58:15+00:00","verdict":"ACCEPT","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"Pythia releases 16 identically trained LLMs with full checkpoints and data tools to study training dynamics, scaling, memorization, and bias in language models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2211.09085","ref_index":215,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Galactica: A Large Language Model for Science","primary_cat":"cs.CL","submitted_at":"2022-11-16T18:06:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Galactica, a science-specialized LLM, reports higher scores than GPT-3, Chinchilla, and PaLM on LaTeX knowledge, mathematical reasoning, and medical QA benchmarks while outperforming general models on BIG-bench.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2211.05100","ref_index":288,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"BLOOM: A 176B-Parameter Open-Access Multilingual Language Model","primary_cat":"cs.CL","submitted_at":"2022-11-09T18:48:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"BLOOM is a 176B-parameter open-access multilingual language model trained on the ROOTS corpus that achieves competitive performance on benchmarks, with improved results after multitask prompted finetuning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}