{"total":26,"items":[{"citing_arxiv_id":"2605.21301","ref_index":40,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Automatic Discovery of Disease Subgroups by Contrasting with Healthy Controls","primary_cat":"cs.LG","submitted_at":"2026-05-20T15:31:16+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Deep UCSL uses a contrastive EM loss on patient-control labels to isolate disease-driven subgroups in medical imaging by suppressing shared healthy variability.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20871","ref_index":131,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Robustness Analysis of USmorph: II. Optimizing Feature Extraction, Dimensionality Reduction, and Clustering for Unsupervised Galaxy Morphology Classification","primary_cat":"astro-ph.GA","submitted_at":"2026-05-20T08:08:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Optimizes ImageNet-pretrained AlexNet, UMAP, and a bagging multi-cluster voting scheme with K-means, Birch and Agg for unsupervised galaxy morphology classification, reporting improved stability and consistency with galaxy evolution expectations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20473","ref_index":69,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Code Generation by Differential Test Time Scaling","primary_cat":"cs.SE","submitted_at":"2026-05-19T20:39:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DiffCodeGen clusters code candidates by behavioral similarity from fuzzing-synthesized inputs and selects the largest cluster's medoid, matching or exceeding prior test-time scaling methods with far less token and time cost.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17907","ref_index":48,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"One Model to Translate Them All: Universal Any-to-Any Translation for Heterogeneous Collaborative Perception","primary_cat":"cs.CV","submitted_at":"2026-05-18T06:14:30+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"UniTrans pretrains a bank of translator experts and learns combination coefficients from modality mappings in a scene-invariant latent space to enable zero-shot any-to-any feature translation for heterogeneous collaborative perception.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17895","ref_index":40,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Geometric Prototype Learning in Quantum Hilbert Space with Matrix Product States","primary_cat":"quant-ph","submitted_at":"2026-05-18T06:00:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A quantum prototype learning scheme encodes class representatives as generative matrix product states and performs classification and clustering via geometric measures in Hilbert space, outperforming classical prototypes on Fashion-MNIST and ECG data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15325","ref_index":61,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"COPRA: Conditional Parameter Adaptation with Reinforcement Learning for Video Anomaly Detection","primary_cat":"cs.CV","submitted_at":"2026-05-14T18:39:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"COPRA introduces conditional parameter adaptation via RL to dynamically tune frozen VLMs for video anomaly detection, outperforming static methods in in-domain and cross-domain settings while generalizing to other video tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06968","ref_index":32,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"On Similarity of Computational Kernels in our Codes and Proxies","primary_cat":"cs.DC","submitted_at":"2026-05-07T21:36:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"New hardware-usage-based similarity metrics can identify matching computational kernels between proxy applications and performance suites on both CPU and GPU systems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08242","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"An Explainable Unsupervised-to-Supervised Machine Learning Framework for Dietary Pattern Discovery Using UK National Dietary Survey Data","primary_cat":"q-bio.QM","submitted_at":"2026-05-07T09:05:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"An unsupervised-to-supervised ML pipeline on UK NDNS data discovers four dietary patterns, reproduces them with macro-F1 0.963 using a surrogate classifier, and interprets them via SHAP for potential clinical use.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"hierarchical methods can reveal nested structure [15]. Because different algorithms impose different assumptions, comparing multiple methods and cluster numbers is preferable to relying on a single run [16-17]. Internal validation measures such as silhouette score, Davies-Bouldin index and Calinski- Harabasz index provide quantitative evidence [18-20], although final selection also requires interpretability and practical cluster sizes. Explainable machine learning is particularly important in applied health contexts. Feature- importance methods and SHAP values help identify which variables drive model decisions [21]. In nutrition and dietetics, this matters because the value of an AI-derived pattern depends not only on"},{"citing_arxiv_id":"2605.03619","ref_index":63,"ref_count":2,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The Infinite Mutation Engine? Measuring Polymorphism in LLM-Generated Offensive Code","primary_cat":"cs.CR","submitted_at":"2026-05-05T10:44:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A single commercial LLM can cheaply generate large populations of behaviorally equivalent yet structurally diverse malware payloads.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08178","ref_index":29,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Generalized Category Discovery in Federated Graph Learning","primary_cat":"cs.LG","submitted_at":"2026-05-05T08:37:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GCD-FGL mitigates neighborhood absorption and global semantic inconsistency in federated generalized category discovery, delivering +4.86 average HRScore gain over baselines on five graph datasets.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"Step 2: Hierarchical Cluster for Server Category Discovery.Local known category prototypes  𝑘𝑛𝑜𝑤𝑛 𝑖 are absorbed during Step 1 aggregation. Therefore, only the unabsorbedlocalnovelprototypes  𝑛𝑜𝑣𝑒𝑙 𝑖 areinjectedintothe globaldiscoverypool 𝑛𝑜𝑣 = ⋃ 𝑖∈𝑆𝑡  𝑛𝑜𝑣𝑒𝑙 𝑖 .Toextractglobal candidate centers𝑐𝑎𝑛𝑑 for novel categories, we introduce a penalized Silhouette score Rousseeuw (1987) to find the optimal Threshold Cut𝜃∗ on the Constrained Hierarchical Clustering Dendrogram. Intuition.Explicitly penalizing the cluster count enforces compact, meaningful semantic groupings and rejects frag- mented boundaries from isolated clients. 𝜃∗ = arg max 𝜃 (𝑆𝑠𝑖𝑙(𝜃) −𝜆 ℎ𝑐 ⋅Ω(𝜃) ).(15) where Ω(𝜃) = max(0, 𝑁 𝑐𝑙𝑢𝑠𝑡 (𝜃) − (| 𝑘𝑛𝑜𝑤𝑛|+ 2)) repre-"},{"citing_arxiv_id":"2605.00637","ref_index":42,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Class Angular Distortion Index for Dimensionality Reduction","primary_cat":"cs.LG","submitted_at":"2026-05-01T13:19:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CADI quantifies the preservation of relative cluster angles in low-dimensional projections using internal angles from point triples.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00069","ref_index":92,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Soft-MSM: Differentiable Context-Aware Elastic Alignment for Time Series","primary_cat":"cs.LG","submitted_at":"2026-04-30T11:01:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Soft-MSM is a smooth, gradient-enabled version of the context-aware MSM distance for time series alignment that outperforms Soft-DTW alternatives in clustering and nearest-centroid classification.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23342","ref_index":68,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Empirical Insights of Test Selection Metrics under Multiple Testing Objectives and Distribution Shifts","primary_cat":"cs.SE","submitted_at":"2026-04-25T15:05:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A broad empirical benchmark shows how 15 existing test selection metrics perform for fault detection, performance estimation, and retraining under corrupted, adversarial, temporal, natural, and label shifts across image, text, and Android data.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"For malware data, this is commonly observed in data collected at different times, known as temporal covariate shift [24, 57]. Definition.Covariate shift is defined as the case where 𝑃𝑖𝑑 (𝑦|𝑥)=𝑃 𝑜𝑜𝑑 (𝑦|𝑥) and 𝑃𝑖𝑑 (𝑥)≠𝑃 𝑜𝑜𝑑 (𝑥) . For MNIST, IMDb, and Udacity datasets, we use three types of covariate shifts: corrupted, adversarial, and natural shifts [68]. For AndroZoo (data collected in 2017 [3]), we use temporal (data in 2018 and 2019 [3]), adversarial, and natural shifts. To simulate a generalized adversarial scenario, for MNIST, Udacity, and AndroZoo, we use a combination of adversarial images generated by FGSM [23], BIM [40], and PGD [ 54], on three equal parts, respectively. For IMDb, we use Probability"},{"citing_arxiv_id":"2604.22986","ref_index":36,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A Machine Learning Approach to Meteor Classification","primary_cat":"astro-ph.EP","submitted_at":"2026-04-24T19:56:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Machine learning clustering of meteor observations produces a new hardness classification H_class that refines traditional Kb models using more parameters and reveals compositional structure in meteoroid populations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22890","ref_index":19,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AI-Derived Reproductive Phenotypes and Explainable ML for Concurrent Early Multimorbidity in U.S. Women: NHANES 2017-March 2020","primary_cat":"q-bio.OT","submitted_at":"2026-04-24T09:36:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"PCA and k-means on NHANES data identified four reproductive phenotypes in U.S. women aged 20-44, with one fragile subgroup showing 77.5% early multimorbidity prevalence; XGBoost improved discrimination over logistic regression but had worse calibration.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19685","ref_index":47,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"An Answer is just the Start: Related Insight Generation for Open-Ended Document-Grounded QA","primary_cat":"cs.CL","submitted_at":"2026-04-21T17:07:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"InsightGen uses thematic clustering and graph neighborhood selection to generate diverse, relevant insights for open-ended document-grounded questions and releases the SCOpE-QA dataset of 3000 questions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18769","ref_index":86,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"ClusterRAG: Cluster-Based Collaborative Filtering for Personalized Retrieval-Augmented Generation","primary_cat":"cs.IR","submitted_at":"2026-04-14T01:52:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ClusterRAG applies density-based clustering to user profiles for collaborative retrieval in personalized RAG and reports best performance on LaMP tasks by combining target and similar-user profiles.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12049","ref_index":44,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Leveraging Weighted Syntactic and Semantic Context Assessment Summary (wSSAS) Towards Text Categorization Using LLMs","primary_cat":"cs.CL","submitted_at":"2026-04-13T20:41:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"wSSAS is a two-phase deterministic framework that uses hierarchical text organization and SNR-based feature prioritization to improve clustering integrity, categorization accuracy, and reproducibility when applying LLMs to large review datasets.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Metric Description Interpretation Goal Silhouette Score [42] Measures cohesion vs. separation for each sample. Range:[−1,+1] +1: Well-separated 0: Overlapping -1: Misassigned Maximize Davies-Bouldin Index [43] Calculates average similarity be- tween clusters. Lower score indicates better sepa- ration and compactness. 0 is the minimum. Minimize Calinski-Harabasz Index [44] Ratio of between-cluster dispersion to within-cluster dispersion. Higher score indicates dense and well-separated clusters. Maximize 7 wSSAS: A Framework for Improved Text Categorization and Summarization using LLMs These metrics mathematically confirm whether the wSSAS context summary enables the LLM to identify distinct, compact, and meaningful categories."},{"citing_arxiv_id":"2604.07891","ref_index":45,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AFGNN: API Misuse Detection using Graph Neural Networks and Clustering","primary_cat":"cs.SE","submitted_at":"2026-04-09T07:01:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AFGNN detects API misuses in Java code more effectively than prior methods by representing usage as graphs and clustering learned embeddings from self-supervised training.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.23142","ref_index":46,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Do Good, Stay Longer? Temporal Patterns and Predictors of Newcomer-to-Core Transitions in Conventional OSS and OSS4SG","primary_cat":"cs.SE","submitted_at":"2026-01-30T16:30:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"OSS4SG projects retain contributors at 2.2X higher rates with 19.6% higher core status probability than conventional OSS, and a late-spike temporal pattern enables faster core achievement (21 weeks) than early intensive contributions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2511.11153","ref_index":42,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"SCULPT: An Interactive Machine Learning Platform for Analyzing Multi-Particle Coincidence Data from Cold Target Recoil Ion Momentum Spectroscopy","primary_cat":"physics.atm-clus","submitted_at":"2025-11-14T10:34:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SCULPT is an interactive machine learning platform combining UMAP, clustering, and adaptive confidence scoring for analyzing COLTRIMS multi-particle coincidence data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2511.08156","ref_index":5,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"LandSegmenter: Towards a Flexible Foundation Model for Land Use and Land Cover Mapping","primary_cat":"cs.CV","submitted_at":"2025-11-11T12:08:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LandSegmenter creates a task-specific foundation model for LULC mapping using weak labels from existing products, an RS adapter, text encoder, and confidence-guided fusion to achieve competitive zero-shot performance across modalities and taxonomies.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.09276","ref_index":6,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The bixplot: A variation on the boxplot suited for bimodal data","primary_cat":"stat.ME","submitted_at":"2025-10-10T11:19:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Presents the bixplot as an extension of the boxplot incorporating contiguous clustering to visualize bimodality and multimodality while displaying individual data points, with Python and R implementations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2509.20237","ref_index":49,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Investigating the Representation of Backchannels and Fillers in Fine-tuned Language Models","primary_cat":"cs.CL","submitted_at":"2025-09-24T15:27:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Fine-tuning on annotated English and Japanese dialogues improves clustering of backchannels and fillers and makes generated utterances closer to human ones.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2506.14103","ref_index":12,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A Robust Nonparametric Framework for Detecting Repeated Spatial Patterns","primary_cat":"stat.ME","submitted_at":"2025-06-17T01:38:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A nonparametric framework detects repeated spatial patterns via constrained clustering followed by MMD-based reassignment and block permutation under stationarity and mixing conditions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2408.15701","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Robust discriminant analysis","primary_cat":"stat.ME","submitted_at":"2024-08-28T10:59:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A review paper that identifies the outlier sensitivity of classical discriminant analysis and summarizes robust versions based on resistant location and scatter estimators plus diagnostic graphics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}