{"total":51,"items":[{"citing_arxiv_id":"2606.30460","ref_index":32,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"HSAP: A Hierachical Sequence-aware Parallelism for Hybrid-Context Generative Models","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:26:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"HSAP proposes a sequence-aware parallelism algorithm with JIT-optimized NCCL communication, integrated into a hierarchical framework that combines existing paradigms to support correct causal attention on hybrid-context packed sequences.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.30442","ref_index":29,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The FIL Hypothesis: Inductive Biases Help with Kernel Engineering","primary_cat":"cs.AI","submitted_at":"2026-06-29T15:16:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"The FIL Hypothesis claims that inductive biases outperform purely data-driven methods on GPU programming tasks with non-trivial feedback loops.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.28122","ref_index":21,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Higher-Order Fourier Neural Operator: Explicit Mode Mixer for Nonlinear PDEs","primary_cat":"cs.CE","submitted_at":"2026-06-26T14:22:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"HO-FNO extends standard FNO with n-linear spectral mixing and shows improved accuracy on nonlinear PDE benchmarks, sometimes with a single layer beating deeper FNO models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.26873","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Scalable Message-Passing Quantum Graph Neural Networks in the Weisfeiler-Leman Hierarchy","primary_cat":"quant-ph","submitted_at":"2026-06-25T10:58:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The work constructs a permutation-equivariant quantum GNN that implements message passing at selectable Weisfeiler-Leman levels, supports pre-training on small graphs, and demonstrates readout scalability with simulations up to 56 qubits on synthetic, molecular, and TSP datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.23874","ref_index":10,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Identifying structural design principles shaping the computational abilities of recurrent neural networks","primary_cat":"q-bio.NC","submitted_at":"2026-06-22T19:17:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Local 2- and 3-cycles enhance RNN computational capacity for Boolean functions, predicted by structural statistics, while adding interneurons boosts large networks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.23745","ref_index":19,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"JEDEL: Zero-Shot DNA-Encoded Library Design for Early-Stage Drug Discovery","primary_cat":"q-bio.BM","submitted_at":"2026-06-21T19:27:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"JEDEL maps pharmacophore patterns to scalable combinatorial synthesis routes for DNA-encoded libraries, producing focused libraries that outperform baselines on 18 targets in zero-shot mode.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22309","ref_index":21,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The $\\alpha$-Index: A Penalized Authorship-Integrity Framework for Position-Weighted Scientific Contribution","primary_cat":"cs.DL","submitted_at":"2026-06-21T02:32:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The α-index is a conserved position-weighted authorship framework with a senior-author penalty that decreases credit as the number of middle authors increases.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22167","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Early-Exit Graph Neural Networks for Link Prediction","primary_cat":"cs.LG","submitted_at":"2026-06-20T17:56:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Early-exit GNNs for link prediction move the speed-quality Pareto frontier on the HeaRT benchmark by allowing implicit early exiting without auxiliary losses.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18648","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Deep Research in Physical Sciences: A Multi-Agent Framework and Comprehensive Benchmark","primary_cat":"physics.comp-ph","submitted_at":"2026-06-17T03:32:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"PhySciBench benchmark shows current AI models achieve at most 33.5% accuracy on physical science tasks; DelveAgent framework improves accuracy by up to 7.5 points and cuts costs to one-third.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18058","ref_index":116,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Multiscale reconstruction of protein conformations from cryo-EM images","primary_cat":"eess.IV","submitted_at":"2026-06-16T15:35:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A multiscale optimization method using explicit protein backbone geometry reconstructs atomic models from cryo-EM data, showing improved RMSD and TM scores on three simulated datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.17127","ref_index":44,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Agentic Discovery of Non-Canonical Antimicrobial Peptides with AMPGAN v3","primary_cat":"q-bio.QM","submitted_at":"2026-06-15T16:39:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AMPGAN v3 generates non-canonical AMPs with D-amino acids and modifications using two discriminators for stability, validated with two active candidates in vitro, alongside the PepCraft multi-agent discovery framework.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11057","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Flexible Kernels for Protein Property Prediction","primary_cat":"cs.LG","submitted_at":"2026-06-09T16:20:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"New class of sequence kernels for Gaussian processes that use substitution matrices and local linearity to enable data-efficient prediction of protein properties, with extensions to structure-aware multi-task learning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.10415","ref_index":10,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"RATrain: A Resource-Aware Training Runtime for Large Language Models on Bandwidth-Constrained Heterogeneous Supercomputing Platforms","primary_cat":"cs.DC","submitted_at":"2026-06-09T04:42:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"RATrain introduces a resource-aware scheduler and MT-3000-specific backend for 1F1B LLM training that achieves 1.35x speedup and 97% scaling efficiency while preserving training correctness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07865","ref_index":5,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Instrumented data for causal scientific machine learning","primary_cat":"cs.LG","submitted_at":"2026-06-05T21:53:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Instrumented data augments observations with mechanistic models, uncertainty, and counterfactuals to enable causal interventions via Pearl's do-operator in scientific machine learning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07850","ref_index":8,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"PDE-Agents: An LLM-Orchestrated Multi-Agent Framework for Automated Finite Element Simulations with Knowledge Graph-Augmented Reasoning","primary_cat":"physics.comp-ph","submitted_at":"2026-06-05T21:21:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PDE-Agents shows a LangGraph-orchestrated multi-agent LLM framework with GraphRAG that reaches 100% task success and perfect material fidelity on novel materials in ablation tests, with 97.8% success across 1369 production runs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05541","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Methods for Inferring Interaction Potentials from Cross-Linking Mass Spectrometry Data","primary_cat":"physics.chem-ph","submitted_at":"2026-06-04T00:52:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Develops and tests algorithms adapting inverse Henderson problem solvers to parameterize multi-component interaction potentials from XL-MS data in homogeneous and three-phase systems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04452","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"DeltaDiff: Training-Free, Physics-Guided Machine Learning for Predicting Mutant Protein Structures","primary_cat":"physics.chem-ph","submitted_at":"2026-06-03T04:55:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DeltaDiff is a physics-guided inference method that predicts mutant protein structures from a baseline diffusion model without retraining, tested on three systems with nonlocal changes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03014","ref_index":79,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"MOSAIC: Efficient Mixture-of-Agent Scheduling via Adaptive Aggregation and Inference Concurrency","primary_cat":"cs.LG","submitted_at":"2026-06-02T01:40:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MOSAIC uses an Integer Linear Program scheduler for expert placement and prompt assignment plus adaptive aggregation to achieve 1.7-2.3x end-to-end speedup on 4-GPU MoA workloads while keeping accuracy within 0.1pp.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02419","ref_index":94,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"DPA4: Pushing the Accuracy-Cost Frontier of Interatomic Potentials with EMFA SO(2) Convolution","primary_cat":"physics.chem-ph","submitted_at":"2026-06-01T15:59:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DPA4 is a new SE(3)-equivariant interatomic potential with EMFA SO(2) convolution that sets new accuracy-cost records on Matbench Discovery and SPICE benchmarks using fewer parameters than prior models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27873","ref_index":6,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AIBuildAI-2: A Knowledge-Enhanced Agent for Automatically Building AI Models","primary_cat":"cs.AI","submitted_at":"2026-05-27T02:44:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"AIBuildAI-2 introduces a knowledge-enhanced agent with a hierarchical evolving external knowledge base that dynamically loads relevant AI development expertise, achieving first place on MLE-Bench at 70.7% medal rate.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.25055","ref_index":17,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Building Digital Societies as Ecosystems: How Recognition and Repeat Relationships Sustain Cross-Community Work in Open Source","primary_cat":"cs.CY","submitted_at":"2026-05-24T13:04:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Cross-boundary collaboration in open source is sustained by a thin carrier layer of contributors and repeat relationships that increase pull request acceptance rates from 42% to 87%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21083","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AIMBio-Mat: An AI-Native FAIR Platform for Closed-Loop Materials Discovery and Biomedical Translation","primary_cat":"physics.app-ph","submitted_at":"2026-05-20T12:18:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"AIMBio-Mat is a conceptual blueprint for an AI-native, FAIR, governance-aware decision layer that formulates biomedical-materials discovery as constrained multi-objective optimization under uncertainty.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21070","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Towards Understanding Self-Pretraining for Sequence Classification","primary_cat":"cs.LG","submitted_at":"2026-05-20T11:56:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Self-pretraining improves Transformer sequence classification by enabling learning of proximity-biased attention from positional encodings that label supervision alone cannot easily acquire from random starts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20547","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Latent Process Generator Matching","primary_cat":"cs.LG","submitted_at":"2026-05-19T22:49:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Presents a general framework for generator matching on projected image spaces from latent Markov processes, generalizing static latent results to dynamic conditional processes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16126","ref_index":19,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Entropy Across the Bridge: Conditional-Marginal Discretization for Flow and Schr\\\"odinger Samplers","primary_cat":"cs.LG","submitted_at":"2026-05-15T16:11:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Derives a conditional-marginal entropy-rate objective for bridge-aware discretization that yields U-shaped schedules and improves low-NFE sample quality on 2D, CIFAR-10, and protein tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15564","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"CrystalBoltz: End-to-End Protein Structure Determination via Experiment-Guided Diffusion for X-Ray Crystallography","primary_cat":"cs.LG","submitted_at":"2026-05-15T03:11:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CrystalBoltz performs experiment-guided posterior sampling with diffusion models on structure-factor amplitudes for protein structure determination, reporting lower RMSD and R-factors than baselines with 33x faster runtime.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13789","ref_index":7,"ref_count":2,"confidence":0.88,"is_internal_anchor":false,"paper_title":"ENSEMBITS: an alphabet of protein conformational ensembles","primary_cat":"cs.LG","submitted_at":"2026-05-13T17:08:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"Ensembits is the first tokenizer of protein conformational ensembles that outperforms static tokenizers on RMSF prediction and matches them on function and mutation tasks while using less pretraining data.","context_count":1,"top_context_role":"other","top_context_polarity":"unclear","context_text":"anchor the interpretation: if all tokens have identical mean s1 then η2 = 0 (token id is uninformative about motion amplitude); if every residue with the same token has the same s1 then η2 = 1 (token id deterministically predicts amplitude). F-statistic and parametric p-value.The classical inferential question - is η2 distinguishable from zero? - is answered by theF-statistic F= SSbetween/(M−1) SSwithin/(N−M) ∼F(M−1, N−M)underH 0,(7) where H0 asserts that all tokens have the same true mean. UnderH0, both numerator and denominator estimate the same residual variance and one expects F≈1 . Values much larger than 1 indicate that the between-token variance exceeds what residual scatter alone could produce. Permutation null: a distribution-free check.The F -test assumes within-group normality and"},{"citing_arxiv_id":"2605.15219","ref_index":6,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"NOVA: Fundamental Limits of Knowledge Discovery Through AI","primary_cat":"cs.AI","submitted_at":"2026-05-12T21:37:09+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11111","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"ShardTensor: Domain Parallelism for Scientific Machine Learning","primary_cat":"cs.DC","submitted_at":"2026-05-11T18:20:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ShardTensor is a domain-parallelism system for SciML that enables flexible scaling of extreme-resolution spatial datasets by removing the constraint of batch size one per device.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"vehicle for accelerated simulation, scientific discovery, and industrial design. Machine learning has found applications in an incredible breadth of domains: healthcare and medicine [1], [2], industrial design [3]-[5], fluid dynamics [6] and aerodynamics [7], weather and climate forecasting [8], [9], fundamental sciences [10]-[12], and many, many more [13]- [15]. It is not an overstatement to say that machine learning methods are fundamentally changing scientific research, all the way from early development to end user and industrial applications. Scientific data has several attributes that make it especially challenging to use for both training and inference, leading to reduced adoption or degraded applications of these scientific"},{"citing_arxiv_id":"2605.09834","ref_index":10,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Supercharging Bayesian Inference with Reliable AI-Informed Priors","primary_cat":"stat.ML","submitted_at":"2026-05-11T00:21:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Rectified AI priors, obtained by correcting AI-induced data laws before embedding them in techniques like Dirichlet process priors, reduce bias, improve credible interval coverage, and boost performance in tasks like skin disease classification.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"5) 560 (76) 170 (2.2) 0.62 (0.049) Split -6.6 (9.5) 420 (54) 250 (3.1) 0.80 (0.040) NPB -6.6 (9.5) 480 (62) 210 (3.3) 0.74 (0.044) Isotonic Fixed -46 (7.2) 460 (59) 170 (2.2) 0.65 (0.048) Split -45 (7.2) 360 (31) 280 (3.7) 0.90 (0.030) NPB -45 (7.2) 410 (52) 200 (2.5) 0.74 (0.044) Quantile map Fixed 2.8 (7.5) 320 (36) 210 (2.7) 0.86 (0.035) Split 0.058 (7.4) 340 (10) 330 (3.9) 0.98 (0.014) NPB 6.5 (7.4) 310 (30) 240 (3.4) 0.89 (0.031) Table 1:Evaluation of rectifiers and calibration sample construction strategies on the gene expression and age- income regression examples over 100 repetitions. All combinations of rectifiers and strategies are generally able to meaningfully reduce the centering bias to a negligible fraction of that of the raw AI prior."},{"citing_arxiv_id":"2605.09495","ref_index":145,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Enabling Structure-Only Initialization and Out-of-Distribution Generalization in GNN-based Molecular Dynamics Simulators","primary_cat":"physics.chem-ph","submitted_at":"2026-05-10T12:00:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"GNN-based MD simulators achieve stable structure-only initialization and reliable OOD generalization through inference-time physics optimization and a GNN barostat on elastic network compression tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08934","ref_index":84,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"From Mechanistic to Compositional Interpretability","primary_cat":"cs.LG","submitted_at":"2026-05-09T13:08:07+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07554","ref_index":9,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"ProteinJEPA: Latent prediction complements protein language models","primary_cat":"cs.LG","submitted_at":"2026-05-08T10:30:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Masked-position MLM plus JEPA latent prediction outperforms MLM-only pretraining on 10-11 of 16 downstream tasks for 35M-150M protein models while JEPA alone fails.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Rose Orenbuch, Yarin Gal, and Debora S. Marks. ProteinGym: Large-Scale Benchmarks for Pro- tein Design and Fitness Prediction.bioRxiv, page 2023.12.07.570727, December 2023. ISSN 2692- 8205. doi: 10.1101/2023.12.07.570727. URL https://pmc.ncbi.nlm.nih.gov/articles/ PMC10723403/. Dan Ofer and Michal Linial. Protein Language Models Expose Viral Immune Mimicry.Viruses, 17 (9), August 2025. ISSN 1999-4915. doi: 10.3390/v17091199. URL https://www.mdpi.com/ 1999-4915/17/9/1199. Dan Ofer, Nadav Brandes, and Michal Linial. The language of proteins: NLP, machine learning & protein sequences.Computational and Structural Biotechnology Journal, 19:1750-1758, 2021. ISSN 20010370. doi: 10.1016/j.csbj.2021.03.022. URL https://linkinghub."},{"citing_arxiv_id":"2605.04375","ref_index":60,"ref_count":2,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Experiment-as-Code Labs: A Declarative Stack for AI-Driven Scientific Discovery","primary_cat":"eess.SY","submitted_at":"2026-05-06T00:50:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"The paper introduces Experiment-as-Code Labs as a declarative stack synthesizing AI agents, systems orchestration, and physical lab control for AI-driven discovery.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04307","ref_index":16,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A physics-informed neural network approach to solve the spatially inhomogeneous electron Boltzmann equation","primary_cat":"physics.plasm-ph","submitted_at":"2026-05-05T21:14:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A specialized PINN architecture solves the spatially inhomogeneous electron Boltzmann equation with high accuracy across gases and electric field strengths without case-specific tuning.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"by the scaling quantities zs and Us, respectively (specified in Table III). The relationship between the physical approx- imation fθ (z,U) and the network output uθ (˜z, ˜U) is given by A physics-informed neural network approach to solve the spatially inhomogeneous electron Boltzmann equation 7 0 5 10 15 20 25 30 35 40 45 10□3 10□2 10□1 100 101 Q(U ) [10 □16 cm2] Neon Qel Qin ex1 Qin ex2 Qin io 0 5 10 15 20 25 30 10□3 10□2 10□1 100 101 102 Argon 0 5 10 15 20 25 30 U [eV] 10□3 10□2 10□1 100 101 102 Q(U ) [10 □16 cm2] Krypton 0 5 10 15 20 25 30 U [eV] 10□3 10□2 10□1 100 101 102 Xenon FIG. 3: Electron collision cross-section data for neon, argon, krypton, and xenon. fθ (z,U) = fs · uθ (˜z, ˜U), where fs is a characteristic scaling"},{"citing_arxiv_id":"2605.04265","ref_index":155,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Benchmarking open-source tools for in silico antiviral drug discovery","primary_cat":"q-bio.BM","submitted_at":"2026-05-05T19:59:39+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Boltz-2 and fine-tuned DrugFormDTA lead ML-based binding prediction while GNINA leads docking tools on a cleaned antiviral dataset, with performance varying by viral protein.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"structures close to the resolution typically achieved in X-ray crystallography experiments (ie about 1 Å RMSD), at least for a subset of proteins. While AlphaFold2 was a step-change advance, it is important to emphasize that AlphaFold2 is still inaccurate for some classes of proteins and fails completely when it comes to disordered proteins. AlphaFold2 was followed by AlphaFold3 in May 2024.[155] In addition to improving protein prediction, AlphaFold3 extended the AlphaFold2 system to enable the prediction of biomolecular complexes. On the PoseBusters v1 dataset, containing 428 protein-ligand complexes, AlphaFold3 21 outperformed AutoDock Vina and RoseTTAFold All-Atom, two popular docking methods (the task was the prediction of protein-ligand structure, and prediction was deemed successful if the \"pocket-"},{"citing_arxiv_id":"2605.03045","ref_index":150,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"TCD-Arena: Assessing Robustness of Time Series Causal Discovery Methods Against Assumption Violations","primary_cat":"cs.LG","submitted_at":"2026-05-04T18:12:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"TCD-Arena is a new customizable testing framework that runs millions of experiments to map how 33 different assumption violations affect time series causal discovery methods and shows ensembles can boost overall robustness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02014","ref_index":141,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"MIRA: A Score for Conditional Distribution Accuracy and Model Comparison","primary_cat":"stat.ML","submitted_at":"2026-05-03T18:41:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MIRA is a new analytic score for conditional distribution accuracy derived from equal probability mass assignment, enabling Bayesian model comparison via direct posterior validation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20629","ref_index":84,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Rates of forgetting for the sequentially Markov coalescent","primary_cat":"math.PR","submitted_at":"2026-04-22T14:44:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SMC forgets its initial condition geometrically in the jump chain and as 1/ℓ in continuous genetic distance, justifying independent-locus approximations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19961","ref_index":43,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The Research Guide: From Informal Role to Profession","primary_cat":"physics.ed-ph","submitted_at":"2026-04-21T20:15:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"The authors argue that guiding non-PhD learners through authentic research requires a dedicated profession with its own training, career structure, and recognition because existing models and programs fall short.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"(AlphaFold [40]), theory construction through symbolic regression [ 41], and hybrid physics-ML models. The most philosophically significant development is opaque prediction: deep learning systems that predict outcomes, such as plasma disruptions in fusion reactors [ 42], without modeling the underlying physics. Whether such prediction-without-explanation constitutes scientific understanding is actively debated [43, 44]. For the Research Guide this debate is practical: in every project where a student reaches for machine learning, is the black-box prediction sufficient, or must the student seek to understandwhy? Helping students navigate that judgment is a new competency the profession requires. These modes are not a hierarchy or stages of a single method; they are parallel contributions."},{"citing_arxiv_id":"2604.18603","ref_index":4,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Dual Triangle Attention: Effective Bidirectional Attention Without Positional Embeddings","primary_cat":"q-bio.QM","submitted_at":"2026-04-09T19:32:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Dual Triangle Attention achieves effective bidirectional attention with built-in positional inductive bias via dual triangular masks, outperforming standard bidirectional attention on position-sensitive tasks and showing strong masked language modeling results with or without positional embeddings.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"For nucleotide modeling, where functional and structural dependencies can span large genomic distances, DTA's context extension capability may complement existing long-range approaches. Future work should focus on MLM-specific variants of position dropping, potentially enabling robust long-context extension in bidirectional settings without full long-context pretraining. Methods Data sources Argmax position probe.Synthetic sequences were generated by sampling integers uniformly from[0,v)wherev= 64is the vocabulary size. Labels were the 0-indexed position of the first occurrence of the maximum value. Sequence length was fixed atl= 64. Batches of 1,024 sequences were generated on-the-fly during training; evaluation used 16 batches of 1,024 sequences each. Natural language.We used FineWeb-Edu (45), a large-scale filtered web corpus designed for language model pretraining. Text was tokenized using a custom Byte-Pair Encoding (BPE) tokenizer (51) with a vocabulary of 4,096 tokens, chosen to reduce vocabulary size relative to standard tokenizers while preserving reasonable subword granularity. Training sequences were truncated or padded to 256 tokens. Validation and test sets were constructed by filtering documents with at least 1,024 tokens, then splitting the remaining documents into 1,000 documents each for validation and testing. Training data was streamed and filtered to exclude validation and test documents. Halleeet al.| arXiv | April 22, 2026 | 5-12 Fig. 5.DroPE recovery analysis. (a) NLP extended-context validation loss, accuracy, MCC, and F1 before and after dropping positional embeddings at 70% of training. (b) Protein extended-context validation loss, accuracy, MCC, and F1. The vertical dashed line marks the drop point. Shaded regions represent±1 standard deviation across three seeds. (c) NLP final test loss, accuracy, MCC, and F1 comparing RoPE (kept throughout) vs. RoPE-off (dropped at 70%). (d) Protein final test loss, accuracy, MCC, and F1. Signi"},{"citing_arxiv_id":"2604.04736","ref_index":20,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Sampling Parallelism for Fast and Efficient Bayesian Learning","primary_cat":"cs.LG","submitted_at":"2026-04-06T15:03:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Sampling parallelism distributes Bayesian sample evaluations across GPUs for near-perfect scaling, lower memory use, and faster convergence via per-GPU data augmentations, outperforming pure data parallelism in diversity.","context_count":1,"top_context_role":"background","top_context_polarity":"support","context_text":"approaches for neural networks and can aid in pushing scalability limits while even improving model convergence. arXiv:2604.04736v1 [cs.LG] 6 Apr 2026 Özdemir et al. 2 Background Uncertainty in neural networks can have two sources: the inherent randomness of the world (aleatoric, data driven), and the lack of knowledge (epistemic, model driven) [20], both of which need to be quantified to navigate risk-sensitive applications of machine learning. There are a variety of methods that enable UQ, however, they oftentimes require sampling which can be computationally expensive as well as memory intensive [10, 24, 28, 31]. In this pa- per, we discuss Bayesian neural networks (BNNs) with mean-field variational inference, and Monte Carlo dropout (MCD) as exam-"},{"citing_arxiv_id":"2601.01253","ref_index":32,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Stochastic Thermodynamics of Associative Memory","primary_cat":"cond-mat.stat-mech","submitted_at":"2026-01-03T18:25:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DenseAMs show tradeoffs between entropy production, retrieval accuracy, and speed at intermediate loads, with a new failure mode in higher-order networks at finite temperature.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2511.02043","ref_index":12,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Flashlight: PyTorch Compiler Extensions to Accelerate Attention Variants","primary_cat":"cs.LG","submitted_at":"2025-11-03T20:25:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Flashlight is a compiler-native PyTorch framework that generates efficient fused kernels for arbitrary and data-dependent attention variants, supporting more cases than FlexAttention with competitive performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.11752","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Fast and Interpretable Protein Substructure Alignment via Optimal Transport","primary_cat":"q-bio.QM","submitted_at":"2025-10-12T10:47:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PLASMA applies regularized optimal transport with Sinkhorn iterations to produce fast, interpretable residue-level alignments and similarity scores between protein structures.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.07195","ref_index":12,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Accelerating Inference for Multilayer Neural Networks with Quantum Computers","primary_cat":"quant-ph","submitted_at":"2025-10-08T16:26:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Quantum circuits for coherent multilayer neural network inference achieve quadratic to polylogarithmic speedups over classical methods depending on quantum data access models for inputs and weights.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"applicability [8, 9, 10]. This progress is partly facilitated by advances in GPUs, which offer speed-ups for parallelizable operations such as matrix-vector arithmetic. However, as we approach the physical limits of Moore's law [11], the continuous upscaling of CPUs and GPUs may begin to plateau. Consequently, a natural question is whether quantum computing [12, 13, 14] and potential quantum processing units (QPUs) can offer further acceleration for deep learning. The field of quantum machine learning (QML) [15, 16, 17], investigates this possibility. QML can broadly be separated into two main paradigms: (1) quantum algorithms tailored to the structure of near-term quantum hardware [18] under assumptions of limited quantum resources, and (2) using"},{"citing_arxiv_id":"2506.13131","ref_index":47,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AlphaEvolve: A coding agent for scientific and algorithmic discovery","primary_cat":"cs.AI","submitted_at":"2025-06-16T06:37:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AlphaEvolve is an LLM-orchestrated evolutionary coding agent that discovered a 4x4 complex matrix multiplication algorithm using 48 scalar multiplications, the first improvement over Strassen's algorithm in 56 years, plus optimizations for Google data centers and hardware.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"AlphaEvolve: A coding agent for scientific and algorithmic discovery ⟨𝑚, 𝑛, 𝑝⟩ best known [reference] AlphaEvolve ⟨2, 4, 5⟩ 33 [42] 32 ⟨2, 4, 7⟩ 46 [93] 45 ⟨2, 4, 8⟩ 52 [93] 51 ⟨2, 5, 6⟩ 48 [93] 47 ⟨3, 3, 3⟩ 23 [52] 23 ⟨3, 4, 6⟩ 56 [48] 54 ⟨3, 4, 7⟩ 66 [91] 63 ⟨3, 4, 8⟩ 75 [91] 74 ⟨3, 5, 6⟩ 70 [48] 68 ⟨3, 5, 7⟩ 82 [91] 80 ⟨4, 4, 4⟩ 49 [95] 48 ⟨4, 4, 5⟩ 62 [47] 61 ⟨4, 4, 7⟩ 87 [93] 85 ⟨4, 4, 8⟩ 98 [95] 96 ⟨4, 5, 6⟩ 93 [48] 90 ⟨5, 5, 5⟩ 93 [72] 93 Table2 | Upper bounds on the rank of the tensor⟨𝑚, 𝑛, 𝑝⟩ representing the product of an𝑚×𝑛 matrix and an𝑛 × 𝑝 matrix, i.e. the number of scalar multiplications required to compute this matrix product. Beyond the examples shown here, for all parameters𝑚, 𝑛, 𝑝 ≤ 5, AlphaEvolve"},{"citing_arxiv_id":"2502.05909","ref_index":31,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Towards a Universal Foundation Model for Protein Dynamics: A Multi-Chain Tree-Structured Framework with Transformer Propagators","primary_cat":"physics.atom-ph","submitted_at":"2025-02-09T14:08:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Proposes TSCG hierarchical representation and Transformer propagator for universal coarse-grained protein MD with claimed 10k-20k times acceleration over all-atom MD while preserving statistical properties.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2410.19471","ref_index":19,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Improving Inverse Folding for Peptide Design with Diversity-regularized Direct Preference Optimization","primary_cat":"cs.LG","submitted_at":"2024-10-25T11:04:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Diversity-regularized DPO fine-tuning of ProteinMPNN improves structural similarity scores by at least 8% over base model and sequence diversity by up to 20% over standard DPO for peptide inverse folding on OpenFold structures.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2211.09085","ref_index":186,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Galactica: A Large Language Model for Science","primary_cat":"cs.CL","submitted_at":"2022-11-16T18:06:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Galactica, a science-specialized LLM, reports higher scores than GPT-3, Chinchilla, and PaLM on LaTeX knowledge, mathematical reasoning, and medical QA benchmarks while outperforming general models on BIG-bench.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}