{"total":16,"items":[{"citing_arxiv_id":"2607.00275","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Entropy-Regularized Probabilistic Gates for Sparse Model Discovery in Scarce-Data Federated Learning","primary_cat":"cs.LG","submitted_at":"2026-06-30T23:51:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Entropy regularization of probabilistic gates improves test performance and sparsity recovery in scarce-data federated learning over Fed-IHT and FedAvg pruning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.13260","ref_index":43,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Extracting Governing Equations from Latent Dynamics via Multi-View Contrastive Learning","primary_cat":"cs.LG","submitted_at":"2026-06-11T12:16:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DYSCO jointly recovers latent trajectories and governing equations from noisy observations via multi-view contrastive learning, with theoretical guarantees up to affine indeterminacy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08497","ref_index":38,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Explaining Black-Box Language Models: Learning to Optimize Linguistically-Structured Word Subsets","primary_cat":"cs.AI","submitted_at":"2026-06-07T07:54:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Amortized optimization with policy gradients and graph knowledge selects informative word subsets to explain black-box DLM outputs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08491","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"What Makes a Desired Graph for Relational Deep Learning?","primary_cat":"cs.AI","submitted_at":"2026-06-07T07:30:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Schema-derived graphs for relational deep learning suffer from information overload and semantic fragmentation; controlled filtering and injection via an end-to-end optimizer improves accuracy on 26 tasks while often lowering inference cost.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30429","ref_index":59,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Attention-based optimizer for symmetry finding","primary_cat":"quant-ph","submitted_at":"2026-05-28T18:00:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A Set-Transformer architecture with self-attention encodes Pauli-string correlations, optimizes via commutation objective, and finds symmetries with near-deterministic success on physical models like Ising and Toric code.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29075","ref_index":4,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Knowledge Offloading: Decomposing LLMs into Sparse Backbones and Memory Modules","primary_cat":"cs.LG","submitted_at":"2026-05-27T20:29:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"KOFF prunes LLMs to ~12% sparsity while adding LoRA and learned KV memories, preserving performance where plain pruning fails across 3B-8B Llama and Qwen models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14364","ref_index":17,"ref_count":3,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MoRe: Modular Representations for Principled Continual Representation Learning on Sequential Data","primary_cat":"cs.LG","submitted_at":"2026-05-14T04:46:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MoRe identifies modular structure in representations themselves to enable principled reuse, alignment, and expansion of modules during continual adaptation on sequential data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12809","ref_index":270,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Correcting Influence: Unboxing LLM Outputs with Orthogonal Latent Spaces","primary_cat":"cs.LG","submitted_at":"2026-05-12T23:01:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A latent mediation framework with sparse autoencoders enables non-additive token-level influence attribution in LLMs by learning orthogonal features and back-propagating attributions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06632","ref_index":26,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Crafting Reversible SFT Behaviors in Large Language Models","primary_cat":"cs.LG","submitted_at":"2026-05-07T17:44:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"LCDD creates sparse carriers for SFT behaviors that SFT-Eraser can reverse, with ablations showing the sparse structure enables causal control.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06441","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Light-FMP: Lightweight Feature and Model Pruning for Enhanced Deep Recommender Systems","primary_cat":"cs.IR","submitted_at":"2026-05-07T15:41:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Light-FMP prunes features and model parameters in deep recommender systems by pretraining a hard-concrete masking layer on data subsets, then retraining the reduced model to improve both efficiency and accuracy over prior methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07746","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Towards Rapid Constitutive Model Discovery from Multi-Modal Data: Physics Augmented Finite Element Model Updating (paFEMU)","primary_cat":"cs.LG","submitted_at":"2026-04-09T03:08:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"paFEMU enables rapid constitutive model discovery by integrating sparse regression, physics augmentation, and finite element adjoint optimization on multi-modal data for interpretable transfer learning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2603.15250","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"In-Context Symbolic Regression for Robustness-Improved Kolmogorov-Arnold Networks","primary_cat":"cs.LG","submitted_at":"2026-03-16T13:21:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"In-context symbolic regression methods improve robustness of symbolic formula recovery from KANs, cutting median OFAT test MSE by up to 99.8 percent across hyperparameter sweeps.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2505.17469","ref_index":43,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Efficient compression of neural networks and datasets","primary_cat":"cs.LG","submitted_at":"2025-05-23T04:50:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Refined probabilistic and smooth l0 pruning techniques approximate minimum description length for neural networks, achieving high compression with minimal accuracy loss and empirically verifying better sample efficiency and generalization on image and text tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"1907.02519","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Neuron ranking -- an informed way to condense convolutional neural networks architecture","primary_cat":"cs.LG","submitted_at":"2019-07-03T15:20:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Shapley value and variational importance switch methods produce consistent rankings of filter importance in CNNs, enabling compression and interpretability.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"1907.00664","ref_index":60,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Learning World Graphs to Accelerate Hierarchical Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2019-07-01T11:22:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A two-stage framework learns a world graph of pivotal states task-agnostically via joint training of a latent model and curiosity-driven policy, then uses the graph to accelerate hierarchical RL on maze tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"1906.10771","ref_index":25,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Importance Estimation for Neural Network Pruning","primary_cat":"cs.LG","submitted_at":"2019-06-25T22:20:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Taylor-expansion importance scoring enables layer-agnostic pruning of neural networks that outperforms prior methods on ImageNet accuracy-FLOPs trade-offs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}