{"total":20,"items":[{"citing_arxiv_id":"2605.20740","ref_index":72,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Distribution-Aware Reward: Reinforcement Learning over Predictive Distributions for LLM Regression","primary_cat":"cs.LG","submitted_at":"2026-05-20T05:43:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Distribution-Aware Reward optimizes LLM regression by treating rollouts as empirical predictive distributions and rewarding marginal improvements in CRPS quality rather than point accuracy alone.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20363","ref_index":15,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Mapping the Winds of Stance Dynamics using Potential Landscape Models","primary_cat":"cs.SI","submitted_at":"2026-05-19T18:16:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A framework uses stance detection, linear dimensionality reduction, and neural potential landscapes to recover a 3D stance space explaining 45% variance and to visualize large-scale shifts across platforms and years.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19939","ref_index":5,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Uncertainty-aware Machine Learning Interatomic Potentials via Learned Functional Perturbations","primary_cat":"cs.CE","submitted_at":"2026-05-19T15:00:06+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19228","ref_index":27,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Diagnosing Multi-step Reasoning Failures in Black-box LLMs via Stepwise Confidence Attribution","primary_cat":"cs.CL","submitted_at":"2026-05-19T00:57:51+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18354","ref_index":30,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Decoupled Conformal Optimisation: Efficient Prediction Sets via Independent Tuning and Calibration","primary_cat":"cs.LG","submitted_at":"2026-05-18T13:10:21+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DCO decouples tuning for efficiency from calibration for coverage in conformal prediction, maintaining marginal guarantees and reducing average set sizes on benchmarks like ImageNet-A and Diabetes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14527","ref_index":30,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Lang2MLIP: End-to-End Language-to-Machine Learning Interatomic Potential Development with Autonomous Agentic Workflows","primary_cat":"cs.LG","submitted_at":"2026-05-14T08:10:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Lang2MLIP is an LLM multi-agent framework that automates end-to-end development of machine learning interatomic potentials from natural language input for heterogeneous materials systems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13131","ref_index":137,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"ERPPO: Entropy Regularization-based Proximal Policy Optimization","primary_cat":"cs.LG","submitted_at":"2026-05-13T08:01:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"ERPPO adds a DSA-based ambiguity estimator to MAPPO and switches between L1 and L2 entropy regularization to improve exploration and stability in non-stationary multi-dimensional observations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12201","ref_index":18,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Uncertainty Quantification for LLM-based Code Generation","primary_cat":"cs.SE","submitted_at":"2026-05-12T14:40:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RisCoSet applies multiple hypothesis testing to construct risk-controlling partial-program prediction sets for LLM code generation, achieving up to 24.5% less code removal than prior methods at equivalent risk levels.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10196","ref_index":30,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Many Needles in a Haystack: Active Hit Discovery for Perturbation Experiments","primary_cat":"cs.LG","submitted_at":"2026-05-11T08:45:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Probability-of-Hit acquisition function ranks perturbation candidates by posterior probability of threshold exceedance, with asymptotic optimality proof and up to 6.4% gains on real immunology data.","context_count":1,"top_context_role":"background","top_context_polarity":"unclear","context_text":"The experiment proceeds over T rounds. At each round t= 1, . . . , T , the experimenter selects a batch Bt ⊂ G of fixed size b and observes noisy evaluations {yg :g∈B t}, where yg =f(g) +ε g, andε g denotes observation noise. Let B+ t :={g∈B t |f(g)> τ} denote the hits discovered at round t, and define the cumu- lative discovered hit set as GT := T[ t=1 B+ t . Our goal is to maximize hit recovery, measured either as the absolute number of discovered hits|G T |or as the hit ratio HitRatioT := |GT | |G⋆| . 3.3. Sequential and Batch Protocol We consider a batch setting motivated by modern high- throughput experiments, where multiple perturbations can be executed in parallel. Importantly, selections within a 3 Many Needles in a Haystack: Active Hit Discovery for Perturbation Experiments batch must be made without access to the true outcomes of other perturbations in the same batch. Let nt =tb denote the total number of perturbations evalu- ated up to round t. At each round, the experimenter condi- tions on the dataset Dt :={(g, y g) :g∈B s, s < t} to guide the selection ofB t. 3.4. Predictive Model We assume access to a probabilistic predictive model that, conditioned on Dt, provides a posterior predictive distribu- tion for eachg∈ G, characterized by a meanµ t(g)and un- certainty σt(g). Our algorithms are agnostic to the specific choice of model, and this abstraction encompasses Gaussian processes (Rasmussen & Williams, 2006), Bayesian neural networks (Neal, 1996; Blundell et al., 2015), and neural sur- rogates equipped with calibrated uncertainty estimates (Gal & Ghahramani, 2016; Lakshminarayanan et al., 2017). Crucially, the predictive model induces a posterior distribu- tion over the unknown response function f. This enables reasoning about counterfactual observations and supports acquisition strategies based on expected utility or posterior sampling. 3.5. Challenges This setting presents several challenges that distinguish hit discovery from classical op"},{"citing_arxiv_id":"2605.09183","ref_index":60,"ref_count":2,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Learning When to Stop: Selective Imitation Learning Under Arbitrary Dynamics Shift","primary_cat":"cs.LG","submitted_at":"2026-05-09T21:48:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SeqRejectron constructs a stopping rule with a small set of validator policies to achieve horizon-free sample complexity for selective imitation learning under arbitrary dynamics shifts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08574","ref_index":34,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Post-hoc Selective Classification for Reliable Synthetic Image Detection","primary_cat":"cs.CV","submitted_at":"2026-05-09T00:25:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ReSIDe generalizes logit-based confidence scores to intermediate layers of synthetic image detectors and uses preference optimization to aggregate them, cutting area under the risk-coverage curve by up to 69.55% under covariate shifts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06413","ref_index":7,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Decoupled PFNs: Identifiable Epistemic-Aleatoric Decomposition via Structured Synthetic Priors","primary_cat":"stat.ML","submitted_at":"2026-05-07T15:22:35+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Decoupled PFNs use controllable synthetic priors to train separate latent-signal and noise heads, making epistemic-aleatoric decomposition identifiable and improving acquisition in noisy settings.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08202","ref_index":42,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Beyond Penalization: Diffusion-based Out-of-Distribution Detection and Selective Regularization in Offline Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-05-06T01:21:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DOSER detects OOD actions via diffusion-model denoising error and applies selective regularization based on predicted transitions, proving gamma-contraction with performance bounds and outperforming priors on offline RL benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.03399","ref_index":29,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"PODiff: Latent Diffusion in Proper Orthogonal Decomposition Space for Scientific Super-Resolution","primary_cat":"cs.LG","submitted_at":"2026-05-05T06:21:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"PODiff performs conditional diffusion in a fixed, variance-ordered POD latent space to enable efficient probabilistic super-resolution of high-dimensional scientific fields with lower memory and better-calibrated uncertainty than pixel-space or dropout baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01632","ref_index":14,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Perturb and Correct: Post-Hoc Ensembles using Affine Redundancy","primary_cat":"cs.LG","submitted_at":"2026-05-02T22:48:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Perturb-and-Correct generates epistemically diverse predictors from a single pretrained network via hidden-layer perturbations followed by affine least-squares corrections that enforce agreement on calibration data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01346","ref_index":2,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"CHASE: Competing Hypotheses for Ambiguity-Aware Selective Prediction","primary_cat":"cs.CV","submitted_at":"2026-05-02T09:39:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CHASE improves selective prediction under ambiguity by optimizing a ranking-aware selector over margins between competing temporal hypotheses, yielding up to 11% better alignment and 8.8% higher three-way accuracy than baselines on GUV-inspired tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00600","ref_index":44,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Possibilistic Predictive Uncertainty for Deep Learning","primary_cat":"cs.LG","submitted_at":"2026-05-01T12:14:01+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20256","ref_index":61,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"RADS: Reinforcement Learning-Based Sample Selection Improves Transfer Learning in Low-resource and Imbalanced Clinical Settings","primary_cat":"cs.CL","submitted_at":"2026-04-22T07:05:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"RADS applies reinforcement learning to pick informative samples for transfer learning, improving performance over uncertainty and diversity sampling in low-resource imbalanced clinical settings.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17725","ref_index":79,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"RePrompT: Recurrent Prompt Tuning for Integrating Structured EHR Encoders with Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-20T02:20:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RePrompT uses recurrent prompt tuning to inject prior-visit latent states and cohort-derived population prompt tokens into LLMs, yielding better performance than pure EHR or pure LLM baselines on MIMIC clinical prediction tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21493","ref_index":12,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Don't Collapse Your Features: Why CenterLoss Hurts OOD Detection and Multi-Scale Mahalanobis Wins","primary_cat":"cs.LG","submitted_at":"2026-04-10T18:27:46+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Avoiding CenterLoss improves OOD detection via multi-scale Mahalanobis on L2-normalized features, yielding 0.9483 AUROC on CIFAR-10 while preserving competitive in-distribution accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}