{"total":81,"items":[{"citing_arxiv_id":"2607.01678","ref_index":39,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"SCAPE: Accurate and Efficient LLM Training with Extreme Sparse Communication","primary_cat":"cs.LG","submitted_at":"2026-07-02T04:10:42+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SCAPE enables 90-99% sparse gradient communication in sharded Adam-style LLM training by deriving masks from first-moment statistics, achieving up to 43.3% faster pre-training on Llama-500M with no loss in validation loss or downstream accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.31764","ref_index":14,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"NURBS Splatting: A Unified Differentiable Rendering Framework for Vector Graphics","primary_cat":"cs.GR","submitted_at":"2026-06-30T14:51:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"NURBS Splatting represents rational splines as continuous Gaussian fields sampled along the curve to enable stable differentiable rendering of vector graphics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.29031","ref_index":6,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"How to Leverage Synthetic Speech for LLM-Based ASR Systems?","primary_cat":"cs.CL","submitted_at":"2026-06-27T17:57:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Layer selection plus RIR augmentation on synthetic speech matches full real-data ASR performance using 25% real speech in SLAM-ASR.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.28739","ref_index":13,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Agent Safety Is Action Alignment","primary_cat":"cs.AI","submitted_at":"2026-06-27T05:26:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Agent safety cannot be achieved via model refusal training and instead requires external least-privilege enforcement evaluated as action alignment.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.27871","ref_index":31,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"LocalNav: Distilling Frontier VLMs and Embodied RL for On-Device Object Goal Navigation","primary_cat":"cs.RO","submitted_at":"2026-06-26T09:11:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Distillation from frontier VLMs plus E-RLVR regularization produces a 4B local model that achieves 34.5% SR on OVON while cutting inference latency by 82.8%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.26285","ref_index":3,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"TEMPO-Diffusion: Temporally Exposed Malicious Poisoning of Diffusion Models","primary_cat":"cs.CR","submitted_at":"2026-06-24T18:31:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"TEMPO-Diffusion is a targeted backdoor attack framework for diffusion models that uses time-conditioned triggers to poison class-specific synthetic data, achieving high attack success in downstream classifiers.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.25357","ref_index":3,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Compositional Behavioral Semantics for State Abstraction in Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-06-24T03:43:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A category-theoretic compositional framework for behavioral semantics in RL that supports safe transfer of structures under state abstraction and sound quantitative metrics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21282","ref_index":37,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Differential Zonotopes for Verifying Global Robustness of DNNs","primary_cat":"cs.CR","submitted_at":"2026-06-19T09:58:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Differential halo zonotopes enable static verification of global robustness in DNNs by jointly propagating pairs of perturbed inputs while bounding divergence, with a relaxed confidence-based variant.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.19560","ref_index":29,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Understanding Key Features of Time Series Foundation Models from Epidemic Forecasting","primary_cat":"cs.LG","submitted_at":"2026-06-17T20:01:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"UNKNOWN","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Mixture-of-experts fusing multiple pretrained forecasters achieves strongest performance on influenza time series, with pretraining gains largest at longer horizons when domain-aligned and LLM methods underperforming.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.17978","ref_index":10,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"MoCo-AIS: A Contrastive Learning Framework for Similarity Computation of Vessel Trajectories","primary_cat":"cs.AI","submitted_at":"2026-06-16T14:30:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MoCo-AIS is a MoCo-based contrastive learning framework that learns vessel trajectory embeddings and improves similarity computation over baselines on large-scale real-world AIS datasets while offering a benchmarking platform.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.13024","ref_index":14,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"CausalMoE: A Billion-Scale Multimodal Foundation Model for Granger Causal Discovery with Pattern-Routed Heterogeneous Experts","primary_cat":"cs.LG","submitted_at":"2026-06-11T07:57:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CausalMoE is a multimodal foundation model with pattern-routed heterogeneous experts and LLM/VLM integration that claims new SOTA performance on supervised and few-shot Granger causal discovery benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.12494","ref_index":60,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Net-Ev$^2$: A Generative Simulator for Network Event Evolution","primary_cat":"cs.LG","submitted_at":"2026-06-10T12:41:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Net-Ev² proposes a two-stage generative simulator with structure-guided masked pre-training and topology-aware diffusion using graph U-Net down/upsampling to model network event evolution from text inputs, plus a new 6.5M multimodal benchmark and JL-MMD metric.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11841","ref_index":37,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Scene-Adaptive Nonlinear Tone Curves for Pseudo Ground-Truth Generation in Low-Light 3D Gaussian Splatting","primary_cat":"cs.CV","submitted_at":"2026-06-10T09:20:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Scene-adaptive nonlinear tone curves (ASE and AP3) with percentile normalisation and offset outperform linear gain for pseudo-GT generation in low-light 3DGS, delivering PSNR gains up to 4.34 dB on LOM and 3.25 dB on RealX3D across 21 scenes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.10053","ref_index":19,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Stability in Competitive Search with Results Diversification","primary_cat":"cs.GT","submitted_at":"2026-06-08T18:27:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Game-theoretic analysis of diversification in competitive search reveals a diversity-stability tradeoff, with a new method to guarantee corpus equilibrium.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.09248","ref_index":69,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Temporal-Aware Reasoning Optimization for Video Temporal Grounding","primary_cat":"cs.CV","submitted_at":"2026-06-08T09:21:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TaRO improves video temporal grounding in MLLMs via constructive reasoning exploration from dense captions and a temporal-sensitivity reward that uses logit drops on disrupted event boundaries, followed by curriculum learning to SOTA results.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08049","ref_index":56,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"SKILL.nb: Selective Formalization and Gated Execution for Durable Agent Workflows","primary_cat":"cs.AI","submitted_at":"2026-06-06T08:27:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SKILL.nb uses selective formalization and gate-conditioned execution in auditable notebooks to improve durability of agent workflows, achieving 53.7% success on WebArena-Verified with 91.7% retention across re-executions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07193","ref_index":38,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Shield-Loco: Shielding Locomotion Policies with Predictive Safety Filtering","primary_cat":"cs.RO","submitted_at":"2026-06-05T11:59:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A post-hoc predictive safety filter adjusts RL policy contact locations for quadruped robots via sampling-based optimization on a full-physics model, reducing safety violations in cluttered environments with minimal performance deviation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03926","ref_index":26,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"DiffUNet^2: Bidirectional Prediction, Probabilistic Generation and Collaborative Visual Discovery for Scientific Data","primary_cat":"cs.HC","submitted_at":"2026-06-02T17:15:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DiffUNet^2 is a bidirectional conditional diffusion model integrated with visual tools for probabilistic exploration of scientific time series across five evaluated datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03430","ref_index":20,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"FlowGuard: Flow Matching for Identity-Independent Detection of Data-Free Model Stealing Attacks on Energy System Intrusion Detection Systems","primary_cat":"cs.CR","submitted_at":"2026-06-02T10:18:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"FlowGuard applies continuous normalizing flows to flag out-of-distribution synthetic queries from model stealing attacks on IDS, achieving stable detection in single-client and 100-client Sybil settings unlike identity-dependent baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02282","ref_index":27,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"POIROT: Interrogating Agents for Failure Detection in Multi-Agent Systems","primary_cat":"cs.AI","submitted_at":"2026-06-01T14:05:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"POIROT protocol repurposes agents in LLM multi-agent systems as an internal diagnostic layer for failure detection, outperforming single-LLM evaluators with gains that increase with complexity, agent count, and fault types.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00718","ref_index":11,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"LLM-Driven Co-Evolutionary Automated Heuristic Design for Bi-Component Coupled Combinatorial Optimization","primary_cat":"cs.AI","submitted_at":"2026-05-30T13:04:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CoEvo-AHD is an LLM-driven dual-population co-evolutionary method for automated heuristic design in bi-component coupled combinatorial optimization that achieves competitive results on TTP and TPP.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.31388","ref_index":12,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Constrained Multi-Objective Reinforcement Learning with Max-Min Criterion","primary_cat":"cs.LG","submitted_at":"2026-05-29T14:52:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Introduces a constrained max-min MORL algorithm with convergence analysis, validated in tabular settings and three simulated control domains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30581","ref_index":92,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Prior Availability in Industrial Visual Sim-to-Real: A Review of CAD-Guided and CAD-Unavailable Regimes","primary_cat":"cs.CV","submitted_at":"2026-05-28T21:18:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A review paper that organizes industrial visual sim-to-real literature into CAD-available, CAD-unavailable, and boundary-prior regimes based on the type of prior information available.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02607","ref_index":5,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Geometry-Aware Tabular Diffusion","primary_cat":"cs.LG","submitted_at":"2026-05-23T17:59:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GATD adds explicit geometric relational supervision to tabular diffusion, achieving SOTA benchmark wins with substantially fewer parameters across ten datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02602","ref_index":40,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Graph Mamba Survival Analysis Based on Topology-Aware ordering","primary_cat":"cs.LG","submitted_at":"2026-05-23T09:23:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"TopoMamSurv introduces topology-aware ordering and bidirectional Mamba with GCN for efficient WSI graph survival analysis, claiming performance gains on five TCGA datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22981","ref_index":13,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Memorization Dynamics of Fill-in-the-Middle Pretraining","primary_cat":"cs.CL","submitted_at":"2026-05-21T19:23:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"FIM pretraining yields linear growth of verbatim extraction with data repetitions and stronger prefix dependence for recall than left-to-right training in matched Llama models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21071","ref_index":29,"ref_count":3,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Fine-grained Claim-level RAG Benchmark for Law","primary_cat":"cs.CL","submitted_at":"2026-05-20T11:56:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ClaimRAG-LAW is a French-English legal RAG benchmark with claim-level granularity for experts and non-experts that reveals limitations in current retrieval and generation performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17894","ref_index":6,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Evaluating Cognitive Age Alignment in Interactive AI Agents","primary_cat":"cs.AI","submitted_at":"2026-05-18T05:56:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"The paper presents ChildAgentEval as the first psychometrically grounded benchmark comparing MLLM-based agents' reasoning performance to age-specific human cognitive stages.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16919","ref_index":25,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"CAST: Causal Anchored Simplex Transport for Distribution-Valued Time Series","primary_cat":"stat.ML","submitted_at":"2026-05-16T10:23:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CAST is a successor-local operator for causal forecasting of simplex-valued time series that retrieves empirical successors from causal context, stabilizes them with a persistence anchor, and applies bounded local stochastic transport while preserving the simplex by construction.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16486","ref_index":18,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"StAD: Stein Amortized Divergence for Fast Likelihoods with Diffusion and Flow","primary_cat":"stat.ML","submitted_at":"2026-05-15T18:00:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"StAD distills divergence of PF-ODEs via the Langevin-Stein operator for faster, lower-variance likelihood estimation in generative models without Jacobian costs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16468","ref_index":13,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Mechanistically Interpretable Neural Encoding Reveals Fine-Grained Functional Selectivity in Human Visual Cortex","primary_cat":"cs.CV","submitted_at":"2026-05-15T11:28:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MINE uses mechanistic interpretability on language-aligned image representations to generate per-voxel feature descriptions, validated via image generation and counterfactual edits that causally shift brain activation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15809","ref_index":32,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Diversified Residual Symbolic Regression","primary_cat":"cs.NE","submitted_at":"2026-05-15T10:04:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DRSR uses Quality-Diversity to produce diverse symbolic regression expressions differing in residual distributions, enabling post-search selection on synthetic and astronomical data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15058","ref_index":166,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"NeuroTrain: Surveying Local Learning Rules for Spiking Neural Networks with an Open Benchmarking Framework","primary_cat":"cs.NE","submitted_at":"2026-05-14T16:50:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A taxonomy of SNN training algorithms is presented with the release of NeuroTrain, an open benchmarking framework for reproducible comparisons across datasets and architectures.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"tization Clip-Floor-Shift (QCFS) activation function [163], which replaces ReLU during ANN training so that the source network already produces outputs matching the quantization resolution ofTspiking timesteps; this was the first method to achieve competitive conversion atT=4. Subsequent work pushed latency further through optimized membrane poten- tial initialization [166], iterative retraining down to a single timestep [167], and conversion from quantized ANNs that for- mally establishes the equivalence betweenT-timestep spiking inference and∼log 2(T)-bit weight quantization [168]. On the calibrationaxis, post-training correction methods adjust thresh- olds and other parameters using only a small calibration set (often∼128 images), avoiding full retraining."},{"citing_arxiv_id":"2605.14897","ref_index":18,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Critic-Driven Voronoi-Quantization for Distilling Deep RL Policies to Explainable Models","primary_cat":"cs.LG","submitted_at":"2026-05-14T14:38:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Critic-Driven Voronoi State Partitioning distills deep RL policies into piecewise-linear models by iteratively adding linear subpolicies in high-value-error regions identified by the critic.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14851","ref_index":22,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"IFPV: An Integrated Multi-Agent Framework for Generative Operational Planning and High-Fidelity Plan Verification","primary_cat":"cs.MA","submitted_at":"2026-05-14T13:58:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"IFPV integrates multi-perspective hierarchical agents for generative planning with an adversarial cognitive simulation engine for verification, reporting 19.4% higher mission success, 41.7% lower cost versus LLM baseline, and 31.8% higher suppression versus rule-based validation in combat simulation","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14465","ref_index":36,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"From Table to Cell: Attention for Better Reasoning with TABALIGN","primary_cat":"cs.AI","submitted_at":"2026-05-14T07:00:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"TABALIGN pairs a diffusion language model planner emitting binary cell masks with a trained attention verifier, raising average accuracy 15.76 points over strong baselines on eight table benchmarks while speeding execution 44.64%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12905","ref_index":22,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Same Image, Different Meanings: Toward Retrieval of Context-Dependent Meanings","primary_cat":"cs.IR","submitted_at":"2026-05-13T02:31:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Image meanings grow more context-dependent with semantic abstraction, requiring narrative grounding for accurate retrieval at higher levels.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Vision-Language Model's Perception of the World at Any Resolution.arXiv preprint arXiv:2409.12191(2024). [21] Cong Wei, Yang Chen, Haonan Chen, Hexiang Hu, Ge Zhang, Jie Fu, Alan Ritter, and Wenhu Chen. 2024. UniIR: Training and Benchmarking Universal Multimodal Information Retrievers. InProceedings of the European Conference on Computer Vision (ECCV). [22] Jheng-Hong Yang, Carlos Lassance, Rafael Sampaio de Rezende, Krishna Srini- vasan, Miriam Redi, Stéphane Clinchant, and Jimmy Lin. 2023. AToMiC: An Image/Text Retrieval Test Collection to Support Multimedia Content Creation. InProceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2975-2984."},{"citing_arxiv_id":"2605.12693","ref_index":13,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"IGT-OMD: Implicit Gradient Transport for Decision-Focused Learning under Delayed Feedback","primary_cat":"cs.LG","submitted_at":"2026-05-12T19:43:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"IGT-OMD reduces gradient transport error from quadratic to linear in delay length for delayed bilevel optimization and achieves sublinear regret with adaptive steps.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12678","ref_index":6,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"No One Knows the State of the Art in Geospatial Foundation Models","primary_cat":"cs.CV","submitted_at":"2026-05-12T19:29:51+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An audit of 152 papers reveals that geospatial foundation models lack standardized evaluations, training controls, and weight releases, so no one knows the state of the art.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12106","ref_index":22,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Large Language Models as Amortized Pareto-Front Generators for Constrained Bi-Objective Convex Optimization","primary_cat":"cs.AI","submitted_at":"2026-05-12T13:20:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DIPS fine-tunes LLMs to output ordered feasible decision vectors approximating Pareto fronts for constrained bi-objective convex problems, reaching 95-98% normalized hypervolume with 0.16s inference.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[20] Zangir Iklassov, Yali Du, Farkhad Akimov, and Martin Takáˇc. Self-guiding exploration for combinatorial problems. InAdvances in Neural Information Processing Systems, 2024. [21] Xia Jiang, Yaoxin Wu, Minshuo Li, Zhiguang Cao, and Yingqian Zhang. Large language models as end-to-end combinatorial optimization solvers.arXiv preprint arXiv:2509.16865, 2025. [22] Shengkai Jin, Tianyu Chen, Chonghan Gao, and Jun Han. GeoNum: Bridging numerical continuity and language semantics via geometric embedding. InProceedings of the AAAI Conference on Artificial Intelligence, volume 40, pages 22426-22434, 2026. doi: 10.1609/aaai. v40i27.39401. [23] Yuu Jinnai, Ukyo Honda, Tetsuro Morimura, and Peinan Zhang. Generating diverse and high-"},{"citing_arxiv_id":"2605.10898","ref_index":9,"ref_count":2,"confidence":0.5,"is_internal_anchor":false,"paper_title":"How Creatives Approach GenAI Image Generation: Tensions Between Structured Guidance, Self-Experimentation, and Creative Autonomy","primary_cat":"cs.HC","submitted_at":"2026-05-11T17:40:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Qualitative studies show creatives prefer self-experimentation over structured guidance for GenAI image tools to preserve creative autonomy despite terminology barriers.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"features improve usability and reduce friction, they primarily sup- port outcome control rather than conceptual understanding. Users may gain more ways to manipulate results, but still lack transfer- able intuitions about how systems interpret inputs, why outputs vary, or when failures occur. As models change over time, users must also continually adapt strategies that may not transfer across versions or tools [9, 27, 74]. We adopt a mental models lens [67] to examine how creatives understand and reason about GenAI image tools. Throughout this paper, we use the term \"creatives\" to refer to visual artists and hobbyists who engage with GenAI image tools in creative con- texts. We first conducted interviews with 8 visual artists and hob- byists to understand how they approach GenAI image tools and"},{"citing_arxiv_id":"2605.10810","ref_index":6,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Likelihood scoring for continuations of mathematical text: a self-supervised benchmark with tests for shortcut vulnerabilities","primary_cat":"cs.LG","submitted_at":"2026-05-11T16:32:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Presents a likelihood-based benchmark for equation-suffix prediction in technical papers with controls to detect shortcut vulnerabilities in model forecasts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18793","ref_index":32,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Dimensional Balance Improves Large Scale Spatiotemporal Prediction Performance","primary_cat":"cs.LG","submitted_at":"2026-05-11T06:29:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A scalable framework harmonizes spatial and temporal representations via low-rank spatial compression and extended temporal horizons to reduce prediction uncertainty in large-scale spatiotemporal tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09079","ref_index":22,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"CauSim: Scaling Causal Reasoning with Increasingly Complex Causal Simulators","primary_cat":"cs.AI","submitted_at":"2026-05-09T17:39:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CauSim turns scarce causal reasoning labels into scalable supervised data by having LLMs incrementally construct complex executable structural causal models.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"and non-executable representations, ▶ consistent gains by curriculum scaling and data volume, ▶ LLM self-improvement, and▶data augmentation (Sec. 5). 2 Related work Evaluating Causal Reasoning.There is an abundant literature on theevaluationof LLM causal reasoning, including on textual problems derived from SCMs [ 6, 19, 8], code and mathematical domains [20, 21], and abstract out-of-distribution regimes [22]. Complementary studies consider 2Structural causal models, introduced by Judea Pearl, are a widely used formalism for representing causal systems in the causal inference literature. For details see Sec. 3.1 and [14, 15, 12], among others. 2 Table 1: Comparison of CauSimwith related work across key properties. ✓ indicates supported; ✗indicates not supported."},{"citing_arxiv_id":"2605.08452","ref_index":8,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"NICE FACT: Diagnosing and Calibrating VLMs in Quantitative Reasoning for Kinematic Physics","primary_cat":"cs.CV","submitted_at":"2026-05-08T20:17:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"VLMs fail to identify visual preconditions or apply physical laws in kinematic physics tasks, as shown by new FACT diagnostics and NICE calibration methods evaluated on six state-of-the-art models.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"All the models are directly from open-sourced Huggingface base: https://huggingface.co/. MetricsThe equation for MRA and KL-divergence is pretty straightforward, and can be easily found in many works [Lan et al., 2026a, Puyin et al., 2025, Yang et al., 2025b]. We show them here again: KL-Divergence (KL) [Kullback and Leibler, 1951]: DKL(H(s)∥P M(s)) = mX i=1 H(s) log H(s) PM(s) .(8) 15 H(s) is human distribution and PM(s) is model distribution. the KL score measures how much H(s) deviates fromP M(s). To evaluate the model performance more comprehensively, the Mean Relative Accuracy (MRA) is as follows: a discrete set of confidence levels C={0.1,0.2, . . . ,0.9,0.95} is defined. For a given predictionˆyand its corresponding ground truthy, the MRA is formulated as:"},{"citing_arxiv_id":"2605.07276","ref_index":54,"ref_count":1,"confidence":0.5,"is_internal_anchor":false,"paper_title":"Signal Reshaping for GRPO in Weak-Feedback Agentic Code Repair","primary_cat":"cs.AI","submitted_at":"2026-05-08T05:41:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Reshaping outcome rewards, process signals, and rollout comparability in GRPO raises strict compile-and-semantic accuracy in agentic code repair from 0.385 to 0.535 under weak feedback.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05953","ref_index":10,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Hallucination as an Anomaly: Dynamic Intervention via Probabilistic Circuits","primary_cat":"cs.CL","submitted_at":"2026-05-07T10:02:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Probabilistic circuits detect LLM hallucinations as residual-stream anomalies with up to 99% AUROC and enable dynamic correction that raises truthfulness scores while cutting unnecessary output corruption.","context_count":1,"top_context_role":"other","top_context_polarity":"unclear","context_text":"ISSN 2374-3468. doi: 10.1609/aaai. v38i15.29675. URLhttps://ojs.aaai.org/index.php/AAAI/article/view/29675. [9] Daniel Xie, Maxwell J. Jacobson, Adil Wazeer, Haiyan Wang, Xinghang Zhang, and Yexiang Xue. Reducing Hallucinations in LLM-based Scientific Literature Analysis Using Peer Context Outlier Detection, 2026. URLhttps://arxiv.org/abs/2604.01461. [10] Albert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, and William El Sayed. Mistral 7B, 2023. URL https://arxiv."},{"citing_arxiv_id":"2605.05908","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Architecture-agnostic Lipschitz-constant Bayesian header and its application to resolve semantically proximal classification errors with vision transformers","primary_cat":"cs.CV","submitted_at":"2026-05-07T09:18:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LipB-ViT adds bi-Lipschitz Bayesian layers to vision transformers and uses uncertainty-aware fusion to identify corrupted labels with over 93% recall at 15% noise, beating kNN baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05534","ref_index":16,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Adversarial Graph Neural Network Benchmarks: Towards Practical and Fair Evaluation","primary_cat":"cs.LG","submitted_at":"2026-05-07T00:27:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A large-scale standardized benchmark of GNN attacks and defenses reveals that target node selection and attacked-model training process can completely distort measured attack effectiveness.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Natarajan, editors,Thirty-Eighth AAAI Conference on Artificial Intelligence, AAAI 2024, Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence, IAAI 2024, Fourteenth Symposium on Educational Advances in Artificial Intelligence, EAAI 2014, February 20-27, 2024, Vancouver, Canada, pages 21063-21071. AAAI Press, 2024. doi: 10.1609/AAAI. V38I19.30098. URLhttps://doi.org/10.1609/aaai.v38i19.30098. [16] S. Ennadir, J. F. Lutzeyer, M. Vazirgiannis, and E. H. Bergou. If you want to be robust, be wary of initialization. In A. Globersons, L. Mackey, D. Belgrave, A. Fan, U. Paquet, J. M. Tomczak, and C. Zhang, editors,Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada,"},{"citing_arxiv_id":"2605.05251","ref_index":6,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Identifier-Free Code Embedding Models for Scalable Search","primary_cat":"cs.CR","submitted_at":"2026-05-05T17:53:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A fine-tuned Qwen3-Embedding model with contrastive learning outperforms baselines on bidirectional source-to-decompiled code association and generalizes to constant-algorithm tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}