{"total":13,"items":[{"citing_arxiv_id":"2605.12090","ref_index":164,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"World Action Models: The Next Frontier in Embodied AI","primary_cat":"cs.RO","submitted_at":"2026-05-12T13:10:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"The paper introduces World Action Models as a new paradigm unifying predictive world modeling with action generation in embodied foundation models and provides a taxonomy of existing approaches.","context_count":1,"top_context_role":"dataset","top_context_polarity":"background","context_text":"UMI on Legs [ 147], HoMMI [ 148], MV-UMI [149] Simulation Data MimicGen [150], ManiSkill2 [ 151], RoboCasa [152], Robo T win [153], DexMimicGen [ 154] TesserAct [66], RoboCerebra [155], SynGrasp-1B [156], Robo T win 2.0 [157], TLA Dataset [ 158] InternData-M1 [159], InternData-A1 [160], QUARD-Auto [161] Human Data SSv2 [162], EPIC-KITCHENS [163], HowT o100M [164], Kinetics-700 [165], EGTEA Gaze+ [ 166] Ego4D [167], HOI4D [168], EgoVid-5M [169], COM Kitchens [ 170], Egocentric-10k [ 171], DreamDojo [ 35] Assembly101 [172], H2O [ 173], EgoP AT3D [174], Ego-Exo4D [175], ARCTIC [176], HoloAssist [177] HOT3D [178], TACO [179], Kaiwu [ 180], OAKINK2 [181], Nymeria [ 182], EgoMimic [183] PH2D [184], Humanoid Everyday [185], IndEgo [ 186], PLAICraft [187], HD-EPIC [ 188], UniHand [189]"},{"citing_arxiv_id":"2605.11459","ref_index":40,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Overcoming Dynamics-Blindness: Training-Free Pace-and-Path Correction for VLA Models","primary_cat":"cs.RO","submitted_at":"2026-05-12T03:17:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Pace-and-Path Correction is a closed-form inference-time operator that decomposes a quadratic cost minimization into orthogonal pace compression and path offset channels to correct dynamics-blindness in chunked-action VLA models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05126","ref_index":21,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation","primary_cat":"cs.RO","submitted_at":"2026-05-06T16:55:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ConsisVLA-4D adds cross-view semantic alignment, cross-object geometric fusion, and cross-scene dynamic reasoning to VLA models, delivering 21.6% and 41.5% gains plus 2.3x and 2.4x speedups on LIBERO and real-world tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00159","ref_index":33,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"E$^2$DT: Efficient and Effective Decision Transformer with Experience-Aware Sampling for Robotic Manipulation","primary_cat":"cs.RO","submitted_at":"2026-04-30T19:28:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"E²DT couples a Decision Transformer with a k-Determinantal Point Process that scores trajectories on return-to-go quantiles, predictive uncertainty, and stage coverage to improve sample efficiency and policy quality in robotic manipulation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.22152","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"dWorldEval: Scalable Robotic Policy Evaluation via Discrete Diffusion World Model","primary_cat":"cs.RO","submitted_at":"2026-04-24T01:50:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A discrete diffusion model tokenizes multimodal robotic data and uses a progress token to predict future states and task completion for scalable policy evaluation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15281","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"R3D: Revisiting 3D Policy Learning","primary_cat":"cs.CV","submitted_at":"2026-04-16T17:50:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A transformer 3D encoder plus diffusion decoder architecture, with 3D-specific augmentations, outperforms prior 3D policy methods on manipulation benchmarks by improving training stability.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.13800","ref_index":26,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"EmbodiedClaw: Conversational Workflow Execution for Embodied AI Development","primary_cat":"cs.RO","submitted_at":"2026-04-15T12:36:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"EmbodiedClaw automates embodied AI development workflows through conversation, reducing manual effort and improving consistency and reproducibility.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11674","ref_index":2,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"AffordSim: A Scalable Data Generator and Benchmark for Affordance-Aware Robotic Manipulation","primary_cat":"cs.RO","submitted_at":"2026-04-13T16:21:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AffordSim integrates open-vocabulary 3D affordance prediction into simulation trajectory generation to create a 50-task benchmark that reaches 93% of manual annotation success rates and enables 24% average zero-shot success on a real Franka FR3.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09860","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"RoboLab: A High-Fidelity Simulation Benchmark for Analysis of Task Generalist Policies","primary_cat":"cs.RO","submitted_at":"2026-04-10T19:42:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RoboLab is a photorealistic simulation benchmark with 120 tasks and perturbation analysis to evaluate true generalization and robustness of robotic foundation models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08544","ref_index":22,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SIM1: Physics-Aligned Simulator as Zero-Shot Data Scaler in Deformable Worlds","primary_cat":"cs.RO","submitted_at":"2026-04-09T17:59:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SIM1 converts sparse real demonstrations into high-fidelity synthetic data through physics-aligned simulation, yielding policies that match real-data performance at a 1:15 ratio with 90% zero-shot success on deformable manipulation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.04974","ref_index":38,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"From Video to Control: A Survey of Learning Manipulation Interfaces from Temporal Visual Data","primary_cat":"cs.RO","submitted_at":"2026-04-04T15:37:11+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A survey introduces an interface-centric taxonomy for video-to-control methods in robotic manipulation and identifies the robotics integration layer as the central open challenge.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.02523","ref_index":10,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"RoboCasa: Large-Scale Simulation of Everyday Tasks for Generalist Robots","primary_cat":"cs.RO","submitted_at":"2024-06-04T17:41:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RoboCasa supplies a large-scale kitchen simulator, generative assets, 100 tasks, and automated data pipelines that produce a clear scaling trend in imitation learning for generalist robots.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2306.03310","ref_index":24,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"LIBERO: Benchmarking Knowledge Transfer for Lifelong Robot Learning","primary_cat":"cs.AI","submitted_at":"2023-06-05T23:32:26+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"LIBERO is a new benchmark for lifelong robot learning that evaluates transfer of declarative, procedural, and mixed knowledge across 130 manipulation tasks with provided demonstration data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}