{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:DKPX4ZHT3M7TF52DEIJLMJPTUT","short_pith_number":"pith:DKPX4ZHT","schema_version":"1.0","canonical_sha256":"1a9f7e64f3db3f32f7432212b625f3a4fabc9be8558ae98ed7417f735b4c2f0c","source":{"kind":"arxiv","id":"2606.06556","version":1},"attestation_state":"computed","paper":{"title":"Robots Need More than VLA and World Models","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Arash Ajoudani, Cesar Cadena, Elis Karcini, Faisal Mehrban, Haitham Bou-Ammar, Jan Peters, Mac Schwager, Marco Hutter, Quang Nguyen","submitted_at":"2026-06-04T10:43:14Z","abstract_excerpt":"Generalist robot intelligence is often framed as a policy-scaling problem: collect more robot demonstrations, train larger Vision-Language-Action (VLA) models, and expect broader generalisation. In this position paper, we argue that this framing is incomplete. The central bottleneck is not only policy learning, but the absence of mechanisms that convert the world's abundant unstructured behavioural data into grounded robot supervision. Human motion, internet video, simulation rollouts, and interactive demonstrations contain rich information about tasks, goals, contacts, failures, and physical "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.06556","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-04T10:43:14Z","cross_cats_sorted":[],"title_canon_sha256":"99b62bacd80a647b96cbb193f8116c4f21b56633d528e5b192c2fbbce45afec8","abstract_canon_sha256":"bdfaf80bf328049eac64436e8807d8b23261e0b2df0ff6c3546275dc33c320ad"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T00:03:44.347545Z","signature_b64":"QpxxTJR8idriH/9blguPndzrbxfP5HBKZYfCmcVBLbSGO+wgsHU2FYqEjBP85BXDFjxugUsx1N3TG1UxA461Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1a9f7e64f3db3f32f7432212b625f3a4fabc9be8558ae98ed7417f735b4c2f0c","last_reissued_at":"2026-06-08T00:03:44.346764Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T00:03:44.346764Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Robots Need More than VLA and World Models","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Arash Ajoudani, Cesar Cadena, Elis Karcini, Faisal Mehrban, Haitham Bou-Ammar, Jan Peters, Mac Schwager, Marco Hutter, Quang Nguyen","submitted_at":"2026-06-04T10:43:14Z","abstract_excerpt":"Generalist robot intelligence is often framed as a policy-scaling problem: collect more robot demonstrations, train larger Vision-Language-Action (VLA) models, and expect broader generalisation. In this position paper, we argue that this framing is incomplete. The central bottleneck is not only policy learning, but the absence of mechanisms that convert the world's abundant unstructured behavioural data into grounded robot supervision. Human motion, internet video, simulation rollouts, and interactive demonstrations contain rich information about tasks, goals, contacts, failures, and physical "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06556","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.06556/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.06556","created_at":"2026-06-08T00:03:44.346889+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.06556v1","created_at":"2026-06-08T00:03:44.346889+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06556","created_at":"2026-06-08T00:03:44.346889+00:00"},{"alias_kind":"pith_short_12","alias_value":"DKPX4ZHT3M7T","created_at":"2026-06-08T00:03:44.346889+00:00"},{"alias_kind":"pith_short_16","alias_value":"DKPX4ZHT3M7TF52D","created_at":"2026-06-08T00:03:44.346889+00:00"},{"alias_kind":"pith_short_8","alias_value":"DKPX4ZHT","created_at":"2026-06-08T00:03:44.346889+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT","json":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT.json","graph_json":"https://pith.science/api/pith-number/DKPX4ZHT3M7TF52DEIJLMJPTUT/graph.json","events_json":"https://pith.science/api/pith-number/DKPX4ZHT3M7TF52DEIJLMJPTUT/events.json","paper":"https://pith.science/paper/DKPX4ZHT"},"agent_actions":{"view_html":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT","download_json":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT.json","view_paper":"https://pith.science/paper/DKPX4ZHT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.06556&json=true","fetch_graph":"https://pith.science/api/pith-number/DKPX4ZHT3M7TF52DEIJLMJPTUT/graph.json","fetch_events":"https://pith.science/api/pith-number/DKPX4ZHT3M7TF52DEIJLMJPTUT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT/action/storage_attestation","attest_author":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT/action/author_attestation","sign_citation":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT/action/citation_signature","submit_replication":"https://pith.science/pith/DKPX4ZHT3M7TF52DEIJLMJPTUT/action/replication_record"}},"created_at":"2026-06-08T00:03:44.346889+00:00","updated_at":"2026-06-08T00:03:44.346889+00:00"}