{"total":27,"items":[{"citing_arxiv_id":"2605.12288","ref_index":61,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"TokenRatio: Principled Token-Level Preference Optimization via Ratio Matching","primary_cat":"cs.CL","submitted_at":"2026-05-12T15:44:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TBPO derives a token-level preference optimization objective from sequence-level pairwise data via Bregman divergence ratio matching that generalizes DPO and improves alignment quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12090","ref_index":260,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"World Action Models: The Next Frontier in Embodied AI","primary_cat":"cs.RO","submitted_at":"2026-05-12T13:10:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"The paper introduces World Action Models as a new paradigm unifying predictive world modeling with action generation in embodied foundation models and provides a taxonomy of existing approaches.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09989","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"StereoPolicy: Improving Robotic Manipulation Policies via Stereo Perception","primary_cat":"cs.RO","submitted_at":"2026-05-11T05:06:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"StereoPolicy fuses stereo image pairs via a Stereo Transformer on pretrained 2D encoders to boost robotic manipulation policies, showing gains over monocular, RGB-D, point cloud, and multi-view methods in simulations and real-robot tests.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09824","ref_index":47,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Geometric Pareto Control: Riemannian Gradient Flow of Energy Function via Lie Group Homotopy","primary_cat":"eess.SY","submitted_at":"2026-05-11T00:01:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Geometric Pareto Control embeds Pareto solutions in a Lie group submanifold and navigates via Riemannian gradient flow to achieve 100% feasibility and low suboptimality in control tasks without retraining.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11033","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"TokaMind for Power Grid: Cross-Domain Transfer from Fusion Plasma","primary_cat":"physics.plasm-ph","submitted_at":"2026-05-10T23:38:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"TokaMind, pre-trained on MAST tokamak data, transfers to power grid PMU data for severe event classification with F1 0.837, where difficulty depends on grid topology and CSD indicators boost early-warning performance over CNN baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09009","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Large Language Models for Sequential Decision-Making: Improving In-Context Learning via Supervised Fine-Tuning","primary_cat":"cs.LG","submitted_at":"2026-05-09T15:49:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Supervised fine-tuning of pretrained LLMs on offline trajectories yields better few-shot sequential decision-making than in-context-only baselines, with a theoretical suboptimality bound derived for linear MDPs by interpreting attention as Q-function estimation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07379","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"RELO: Reinforcement Learning to Localize for Visual Object Tracking","primary_cat":"cs.CV","submitted_at":"2026-05-08T07:34:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RELO replaces handcrafted spatial priors with a reinforcement learning policy for target localization in visual tracking and reports 57.5% AUC on LaSOText without template updates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06609","ref_index":40,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Transformers Efficiently Perform In-Context Logistic Regression via Normalized Gradient Descent","primary_cat":"cs.LG","submitted_at":"2026-05-07T17:27:55+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Multi-layer transformers can implement in-context logistic regression by performing normalized gradient descent steps layer by layer, obtained via supervised training of a single attention layer followed by recurrent application with convergence and OOD guarantees.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04777","ref_index":77,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Bridging Perception and Action: A Lightweight Multimodal Meta-Planner Framework for Robust Earth Observation Agents","primary_cat":"cs.MA","submitted_at":"2026-05-06T11:30:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"The LMMP framework improves tool-calling accuracy and task success rates for Earth observation agents by grounding plans in multimodal features and remote sensing expert knowledge via a two-stage training process.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02300","ref_index":261,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"A Meta Reinforcement Learning Approach to Goals-Based Wealth Management","primary_cat":"cs.LG","submitted_at":"2026-05-04T07:48:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MetaRL pre-trained on GBWM problems delivers near-optimal dynamic strategies in 0.01s achieving 97.8% of DP optimal utility and handles larger problems where DP fails.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02026","ref_index":34,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Towards Systematic Generalization for Power Grid Optimization Problems","primary_cat":"cs.LG","submitted_at":"2026-05-03T19:23:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A shared graph neural network framework jointly solves ACOPF and SCUC problems using physics constraints and shows improved generalization to unseen grid topologies.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00347","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Odysseus: Scaling VLMs to 100+ Turn Decision-Making in Games via Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-05-01T02:05:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Odysseus adapts PPO with a turn-level critic and leverages pretrained VLM action priors to train agents achieving at least 3x average game progress over frontier models in long-horizon Super Mario Land.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.24182","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"$M^2$-VLA: Boosting Vision-Language Models for Generalizable Manipulation via Layer Mixture and Meta-Skills","primary_cat":"cs.RO","submitted_at":"2026-04-27T08:44:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"M²-VLA shows that generalized VLMs can serve as direct backbones for robotic manipulation by selectively extracting task-critical features via Mixture of Layers and adding Meta Skill Modules for efficient trajectory learning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.13472","ref_index":59,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Bridging MARL to SARL: An Order-Independent Multi-Agent Transformer via Latent Consensus","primary_cat":"cs.LG","submitted_at":"2026-04-15T04:52:22+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CMAT uses a transformer decoder to produce a high-level consensus vector in latent space, enabling simultaneous order-independent actions by all agents and optimization via single-agent PPO, with superior results on StarCraft II, Multi-Agent MuJoCo, and Google Research Football.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09824","ref_index":30,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models","primary_cat":"cs.RO","submitted_at":"2026-04-10T18:56:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"UNKNOWN","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ProGAL-VLA uses 3D graphs, symbolic sub-goals, and a Grounding Alignment Contrastive loss to ground actions on verified embeddings, raising robustness from 30.3% to 71.5% and ambiguity AUROC to 0.81 on robotic benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08398","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ADAPTive Input Training for Many-to-One Pre-Training on Time-Series Classification","primary_cat":"cs.LG","submitted_at":"2026-04-09T15:58:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ADAPT is a new pre-training paradigm that aligns physical properties of time-series data to allow simultaneous training on 162 diverse classification datasets, achieving new state-of-the-art performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07034","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"KITE: Keyframe-Indexed Tokenized Evidence for VLM-Based Robot Failure Analysis","primary_cat":"cs.RO","submitted_at":"2026-04-08T12:49:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"KITE is a training-free method that uses keyframe-indexed tokenized evidence including BEV schematics to enhance VLM performance on robot failure detection, identification, localization, explanation, and correction.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06425","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Neural Computers","primary_cat":"cs.LG","submitted_at":"2026-04-07T20:01:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Neural Computers are introduced as a new machine form where computation, memory, and I/O are unified in a learned runtime state, with initial video-model experiments showing acquisition of basic interface primitives from traces.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21938","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"The Biggest Risk of Embodied AI is Governance Lag","primary_cat":"cs.CY","submitted_at":"2026-04-07T03:56:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Governance lag in observing, regulating, and distributing embodied AI is presented as the primary risk, appearing in observational, institutional, and distributive forms.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2509.02544","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"UI-TARS-2 Technical Report: Advancing GUI Agent with Multi-Turn Reinforcement Learning","primary_cat":"cs.AI","submitted_at":"2025-09-02T17:44:45+00:00","verdict":"CONDITIONAL","verdict_confidence":"UNKNOWN","novelty_score":5.0,"formal_verification":"none","one_line_summary":"UI-TARS-2 reaches 88.2 on Online-Mind2Web, 47.5 on OSWorld, 50.6 on WindowsAgentArena, and 73.3 on AndroidWorld while attaining 59.8 mean normalized score on a 15-game suite through multi-turn RL and scalable data generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2410.06158","ref_index":39,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"GR-2: A Generative Video-Language-Action Model with Web-Scale Knowledge for Robot Manipulation","primary_cat":"cs.RO","submitted_at":"2024-10-08T16:00:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GR-2 pre-trains on web-scale videos then fine-tunes on robot data to reach 97.7% average success across over 100 manipulation tasks with strong generalization to new scenes and objects.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2410.02713","ref_index":102,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"LLaVA-Video: Video Instruction Tuning With Synthetic Data","primary_cat":"cs.CV","submitted_at":"2024-10-03T17:36:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLaVA-Video-178K is a new synthetic video instruction dataset that, when combined with existing data to train LLaVA-Video, produces strong results on video understanding benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.13352","ref_index":49,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"AgentDojo: A Dynamic Environment to Evaluate Prompt Injection Attacks and Defenses for LLM Agents","primary_cat":"cs.CR","submitted_at":"2024-06-19T08:55:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"AgentDojo introduces an extensible evaluation framework populated with realistic agent tasks and security test cases to measure prompt injection robustness in tool-using LLM agents.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2312.13139","ref_index":80,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Unleashing Large-Scale Video Generative Pre-training for Visual Robot Manipulation","primary_cat":"cs.RO","submitted_at":"2023-12-20T16:00:43+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A GPT-style model pre-trained on large video datasets achieves 94.9% success on CALVIN multi-task manipulation and 85.4% zero-shot generalization, outperforming prior baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2310.16828","ref_index":149,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"TD-MPC2: Scalable, Robust World Models for Continuous Control","primary_cat":"cs.LG","submitted_at":"2023-10-25T17:57:07+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TD-MPC2 scales an implicit world-model RL method to a 317M-parameter agent that masters 80 tasks across four domains with a single hyperparameter configuration.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2303.03378","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"PaLM-E: An Embodied Multimodal Language Model","primary_cat":"cs.LG","submitted_at":"2023-03-06T18:58:06+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PaLM-E is a single 562B-parameter multimodal model that performs embodied reasoning tasks like robotic manipulation planning and visual question answering by interleaving vision, state, and text inputs with positive transfer from joint training on language and robotics data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2301.04104","ref_index":52,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Mastering Diverse Domains through World Models","primary_cat":"cs.AI","submitted_at":"2023-01-10T18:12:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DreamerV3 uses world models and robustness techniques to solve over 150 tasks across domains with a single configuration, including Minecraft diamond collection from scratch.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}