{"total":23,"items":[{"citing_arxiv_id":"2606.30886","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Multipolar Magnetic-Field Inference for PSR J0740+6620 with Neural-Network-Accelerated NICER Pulse-Profile Modeling","primary_cat":"astro-ph.HE","submitted_at":"2026-06-29T20:22:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Neural-network surrogate accelerated MCMC infers multipolar magnetic field parameters for PSR J0740+6620 from NICER data, finding broad multimodal posteriors and disfavoring a zero-offset model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.30388","ref_index":30,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A Stochastic--Geometric Theory of Scaling Laws in Grokking","primary_cat":"stat.ML","submitted_at":"2026-06-29T14:43:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A stochastic-geometric model of solution-space topology under Adam derives explicit scaling laws for grokking transition time as a function of learning rate, batch size, and L2 coefficient.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.12610","ref_index":14,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The Mathematics of AI Winters: The mathematical Taxonomy of Paradigm Fragility in AI Winter","primary_cat":"cs.LG","submitted_at":"2026-06-10T19:08:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Established mathematical bottlenecks in representation, optimization, complexity, and high-dimensional learning aligned with the central disappointments of early AI research periods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.09953","ref_index":31,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Deep Slice Interpolation for Reducing Through-Plane Anisotropy and Noise in Head CT","primary_cat":"eess.IV","submitted_at":"2026-06-08T10:45:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Deep learning system synthesizes intermediate head CT slices to halve through-plane anisotropy while providing implicit denoising, outperforming baselines on structural metrics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03935","ref_index":50,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Quadratic integrate-and-fire neurons exhibit less fragmented loss landscapes and outperform leaky integrate-and-fire neurons in spike-based gradient descent","primary_cat":"cs.NE","submitted_at":"2026-06-02T17:26:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"QIF neurons outperform LIF neurons in spike-based gradient descent training of spiking neural networks by avoiding discontinuities that fragment the loss landscape.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29428","ref_index":33,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"DELOS: Detecting Shallow Transits in Kepler Photometry Using a Contrastive-Learning Framework","primary_cat":"astro-ph.EP","submitted_at":"2026-05-28T06:22:22+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DELOS applies contrastive learning to phase-folded light curves to detect shallow intermediate-to-long period transits, reporting 15.5% and 11.25% gains in combined precision-recall over BLS and TLS in low-SNR tests plus 3-80x speedups.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.24545","ref_index":19,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Rethinking Federated Unlearning via the Lens of Memorization","primary_cat":"cs.LG","submitted_at":"2026-05-23T12:25:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Introduces Grouped Memorization Evaluation and FedMemPrune to remove unique memorized information in federated unlearning while preserving overlapping knowledge.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23708","ref_index":270,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Learning Dynamic Stability Landscapes in Synchronization Networks","primary_cat":"cs.LG","submitted_at":"2026-05-22T14:55:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces graph-to-image prediction of per-node dynamic stability landscapes in oscillator networks from topology, releases two 10k-graph datasets, and shows GNN-CNN models achieve good accuracy with cross-size generalization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19579","ref_index":20,"ref_count":2,"confidence":0.88,"is_internal_anchor":false,"paper_title":"TACK: A Statistical Evaluation of Degradation Activity on a Novel TArgeting Chimeras Knowledge Dataset","primary_cat":"q-bio.QM","submitted_at":"2026-05-19T09:22:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A new aggregated PROTAC dataset shows potency is more predictable than maximum degradation by ML, with classical methods outperforming a specialized graph neural network.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17885","ref_index":5,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Multi-agent AI systems outperform human teams in creativity","primary_cat":"cs.CL","submitted_at":"2026-05-18T05:52:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Multi-agent LLM teams outperform human teams in creativity (d=1.50) across tasks by producing more novel ideas, with distinct semantic exploration patterns predicting success for each group.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12619","ref_index":76,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Human face perception reflects inverse-generative and naturalistic discriminative objectives","primary_cat":"q-bio.NC","submitted_at":"2026-05-12T18:06:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Human face perception aligns with neural networks trained on inverse-generative and naturalistic discriminative tasks, as these best predict human dissimilarity judgments on controversial and random face pairs.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"face-selectivecortex[74,75]toaskwhethertheobjectivesthatbestexplainperceptualjudgmentsalsobestexplain cortical representations. 4 Methods 4.1 Model training All candidate models were built upon the VGG-16 architecture [42] (torchvision implementation) without batch normalization, operating on128×128-pixel RGB inputs. Model weights were initialized using Kaiming normal initialization[76].Foreachtrainingdataset,inputimageswerestandardizedusingper-channelstatisticscomputed from that dataset, and the same normalization was applied during stimulus optimization and model-behavior analyses. Models were trained using PyTorch Lightning [77], with dropout, where present, enabled only during training. 4.1.1 faceID-BFM ThefaceID-BFMmodelwastrainedasasupervisedface-identificationmodelonasyntheticBFM-identitydataset."},{"citing_arxiv_id":"2605.07648","ref_index":12,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Learning Large-Scale Modular Addition with an Auxiliary Modulus","primary_cat":"cs.LG","submitted_at":"2026-05-08T12:16:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An auxiliary modulus during training reduces wrap-around issues and preserves train-test input distributions, enabling better accuracy and sample efficiency for large N and q in modular addition learning.","context_count":1,"top_context_role":"other","top_context_polarity":"unclear","context_text":"[11] Michael Hahn and Mark Rofin. Why are sensitive functions hard for transformers? InProceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 14973-15008, Bangkok, Thailand, August 2024. Association for Computational Linguistics. doi: 10.18653/v1/2024.acl-long.800. URLhttps://aclanthology.org/2024.acl-long.800/. [12] Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. Delving deep into rectifiers: Surpassing human- level performance on imagenet classification. InProceedings of the 2015 IEEE International Conference on Computer Vision (ICCV), page 1026-1034, 2015. URLhttps://doi.org/10.1109/ICCV.2015.123. [13] Hiroshi Kera, Yuki Ishihara, Yuta Kambe, Tristan Vaccon, and Kazuhiro Yokoyama."},{"citing_arxiv_id":"2605.03710","ref_index":25,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Amortized Variational Inference for Joint Posterior and Predictive Distributions in Bayesian Uncertainty Quantification","primary_cat":"stat.ML","submitted_at":"2026-05-05T12:56:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An amortized variational framework jointly targets the posterior and posterior-predictive distributions via a KL upper bound and moment regularization, yielding more accurate predictions at lower online cost than two-stage variational inference.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02310","ref_index":77,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A Variational Kolosov--Muskhelishvili Network for Elasticity and Fracture","primary_cat":"cs.CE","submitted_at":"2026-05-04T08:02:31+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"evaluating the higher-order derivatives entering the governing equations. Despite the above advantages, the exponential nonlinearity may lead to rapid activation growth and gradient ex- plosion. To stabilize training at early iterations, an exponential-aware initialization method is adopted to perform the layerwise scaling together with a complex-valued variant of He's initialization [77]. For each complex linear layer Lℓ :C n(ℓ) in →C n(ℓ) out, the real and imaginary parts of the weights are independently initialized as given by, ℜW(ℓ) i j ,ℑW (ℓ) i j ∼ G 0, ρℓ 2n (ℓ) in  ,b (ℓ) =0,(23) 6 whereρ ℓ >0 is a layerwise variance factor. Given a global parameterβ >0 and a prescribed numberM e of pre- stabilizing layers,ρ ℓ is chosen by the data-dependent rule,"},{"citing_arxiv_id":"2604.27818","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"MASCing: Configurable Mixture-of-Experts Behavior via Activation Steering Masks","primary_cat":"cs.CR","submitted_at":"2026-04-30T12:58:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MASCing uses an LSTM surrogate and optimized steering masks to enable flexible, inference-time control over MoE expert routing for safety objectives, improving jailbreak defense and content generation success rates substantially across multiple models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22034","ref_index":7,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"LTBs-KAN: Linear-Time B-splines Kolmogorov-Arnold Networks","primary_cat":"cs.LG","submitted_at":"2026-04-23T19:47:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LTBs-KAN delivers linear-time B-spline evaluation in KANs plus parameter reduction via product-of-sums factorization, with competitive results on MNIST, Fashion-MNIST, and CIFAR-10.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"activation function that approximates 3rd-order B-splines used in the original KAN. In addition, it replaces the incompatible L1 regularization applied to input samples with theL1 penalty on the model weights. Finally, it introduces arXiv:2604.22034v1 [cs.LG] 23 Apr 2026 APREPRINT- APRIL27, 2026 learnable scaling factors for activation functions and adopting Kaiming uniform initialization [ 7] for both the base weight and spline scaling matrices. Following this initial attempt, a combination of B-splines and Radial Basis Functions (BSRBF-KAN) is proposed [3] to fit input vectors during data training. For this, an RBF network with N centers is used to group samples to a specific basis. Thus, by applying linear transformations, it is possible to align a series of 3rd-order B-spline bases to Gaussian"},{"citing_arxiv_id":"2604.17772","ref_index":28,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A Deep Ritz Method for High-Dimensional Steady States of the Cahn-Hilliard Equation","primary_cat":"math.NA","submitted_at":"2026-04-20T03:52:32+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.13835","ref_index":35,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A Resource-Efficient Hybrid CNN-LSTM network for image-based bean leaf disease classification","primary_cat":"cs.CV","submitted_at":"2026-04-15T13:09:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A lightweight hybrid CNN-LSTM network classifies bean leaf diseases at 94.38% accuracy and 1.86 MB size on the ibean dataset, with reported state-of-the-art F1 scores using EfficientNet-B7+LSTM.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"[33] Small filters are selected for computational efficiency, and padding parameters are used to minimise the boundaryeffects.Weimplementedeitherthestrideparameter or max-pool to reduce the input resolution and improve featureextraction[ 34].AllconvolutionallayersemployReLU combined with the Kaiming (He) initialisation to increase non-linearity and ensure more reliable convergence [35]. To effectively leverage the LSTM's capability for spatial correlation, we transform the 2D feature maps generated by the final convolutional layer into a structured 1D temporal sequence. Instead of a standard flattening operation, which collapses the spatial hierarchy, the feature maps are reshaped intoconsecutivetime-stepsthatrepresentascan-lineorpatch-"},{"citing_arxiv_id":"2604.13503","ref_index":26,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Enhancing Event Reconstruction in Hyper-Kamiokande with Machine Learning: A ResNet Implementation","primary_cat":"hep-ex","submitted_at":"2026-04-15T05:41:21+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":5.0,"formal_verification":"none","one_line_summary":"ResNet models classify four particle types and regress vertex, direction, and momentum in Hyper-Kamiokande with resolutions matching likelihood methods but at 30,000-50,000x faster inference on GPU.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"Input images pass through a standard initial 7 × 7 convolution and a 3 × 3 max-pool, followed by four stages of residual blocks with channel depths {64, 128, 256, 512}. An adaptive-average-pooling layer reduces the spatial dimensions, after which a fully connected layer projects to the required out- put dimension. Kaiming-normal weight initialization [26] and zero-initialized residual-branch batch norms [27] are applied. ResNet-50 networks were also evaluated but yielded infe- rior reconstruction performance relative to ResNet-152, despite offering faster training and inference. Since our primary ob- jective was to demonstrate the best achievable reconstruction accuracy for the Hyper-Kamiokande far detector, we report"},{"citing_arxiv_id":"2604.06291","ref_index":9,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"TalkLoRA: Communication-Aware Mixture of Low-Rank Adaptation for Large Language Models","primary_cat":"cs.LG","submitted_at":"2026-04-07T14:57:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TalkLoRA equips MoE-LoRA experts with a communication module that smooths routing dynamics and improves performance on language tasks under similar parameter budgets.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"guage processing task solver? InProceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 1339-1384, Singapore. Association for Computational Linguistics. Keisuke Sakaguchi, Ronan Le Bras, Chandra Bhaga- vatula, and Yejin Choi. 2021. Winogrande: an ad- versarial winograd schema challenge at scale.Com- mun. ACM, 64(9):99-106. Maarten Sap, Hannah Rashkin, Derek Chen, Ronan Le Bras, and Yejin Choi. 2019. Social IQa: Com- monsense reasoning about social interactions. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Lan- guage Processing (EMNLP-IJCNLP), pages 4463- 4473, Hong Kong, China."},{"citing_arxiv_id":"2512.06427","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"A new initialisation to Control Gradients in Sinusoidal Neural network","primary_cat":"cs.LG","submitted_at":"2025-12-06T13:23:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A closed-form initialization for SIREN networks based on pre-activation fixed points and Jacobian variance sequences improves gradient scaling, training dynamics via NTK, and generalization on reconstruction tasks over the original scheme.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2412.20091","ref_index":48,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Gamma-Ray Burst Light Curve Reconstruction: A Comparative Machine and Deep Learning Analysis","primary_cat":"astro-ph.HE","submitted_at":"2024-12-28T09:20:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MLP and Attention U-Net outperform other models in reconstructing GRB light curves on 521 events, cutting plateau parameter uncertainties by 37-41% versus the Willingale baseline while achieving low MSE.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2403.16958","ref_index":55,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"TwinLiteNet+: An Enhanced Multi-Task Segmentation Model for Autonomous Driving","primary_cat":"cs.CV","submitted_at":"2024-03-25T17:17:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"TwinLiteNet+ is a hybrid-encoder multi-task segmentation model with new UCB, USB, and PCAA modules that reports 92.9% mIoU on drivable area and 34.2% IoU on lane segmentation on BDD100K while using 11x fewer FLOPs than prior models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}