{"work":{"id":"c356b2a8-eaa6-476d-b6d8-8680bb5ddff8","openalex_id":null,"doi":null,"arxiv_id":"2510.12796","raw_key":null,"title":"DriveVLA-W0: World Models Amplify Data Scaling Law in Autonomous Driving","authors":null,"authors_text":"Yingyan Li, Shuyao Shang, Weisong Liu, Bing Zhan, Haochen Wang, Yuqi Wang","year":2025,"venue":"cs.CV","abstract":"Scaling Vision-Language-Action (VLA) models on large-scale data offers a promising path to achieving a more generalized driving intelligence. However, VLA models are limited by a ``supervision deficit'': the vast model capacity is supervised by sparse, low-dimensional actions, leaving much of their representational power underutilized. To remedy this, we propose \\textbf{DriveVLA-W0}, a training paradigm that employs world modeling to predict future images. This task generates a dense, self-supervised signal that compels the model to learn the underlying dynamics of the driving environment. We showcase the paradigm's versatility by instantiating it for two dominant VLA archetypes: an autoregressive world model for VLAs that use discrete visual tokens, and a diffusion world model for those operating on continuous visual features. Building on the rich representations learned from world modeling, we introduce a lightweight action expert to address the inference latency for real-time deployment. Extensive experiments on the NAVSIM v1/v2 benchmark and a 680x larger in-house dataset demonstrate that DriveVLA-W0 significantly outperforms BEV and VLA baselines. Crucially, it amplifies the data scaling law, showing that performance gains accelerate as the training dataset size increases.","external_url":"https://arxiv.org/abs/2510.12796","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T04:40:23.821487+00:00","pith_arxiv_id":"2510.12796","created_at":"2026-05-10T05:25:54.490413+00:00","updated_at":"2026-05-25T04:40:23.821487+00:00","title_quality_ok":true,"display_title":"DriveVLA-W0: World Models Amplify Data Scaling Law in Autonomous Driving","render_title":"DriveVLA-W0: World Models Amplify Data Scaling Law in Autonomous Driving"},"hub":{"state":{"work_id":"c356b2a8-eaa6-476d-b6d8-8680bb5ddff8","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":22,"external_cited_by_count":null,"distinct_field_count":3,"first_pith_cited_at":"2025-11-28T17:17:38+00:00","last_pith_cited_at":"2026-05-22T06:17:35+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-27T16:07:51.161800+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":6},{"context_role":"baseline","n":4}],"polarity_counts":[{"context_polarity":"background","n":6},{"context_polarity":"baseline","n":4}],"runs":{},"summary":{},"graph":{},"authors":[]}}