{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:UVOY7HFKH7RDIN3ZJ6L6WKCE7Q","short_pith_number":"pith:UVOY7HFK","schema_version":"1.0","canonical_sha256":"a55d8f9caa3fe23437794f97eb2844fc1211d6ee4079c2aead860e7f7b69518a","source":{"kind":"arxiv","id":"2606.02735","version":1},"attestation_state":"computed","paper":{"title":"See Less, Specify More: Visual Evidence Budgets for Generalizable VLAs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.RO","authors_text":"Kei Ota, Tatsuya Matsushima, Yueh-Hua Wu","submitted_at":"2026-06-01T18:02:07Z","abstract_excerpt":"Generalization remains a central bottleneck for vision-language-action (VLA) models: under distractors, appearance shifts, and semantically similar tasks, the policy must often infer local execution details from coarse instructions while also deciding which parts of the image matter for control. We present S2 (See Less, Specify More), a framework for improving VLA generalization by training the executor under a cleaner interface.\n  Specify More preserves the original instruction as a stable high-level goal while relabeling each trajectory into refined trajectory- and subtask-level language tha"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.02735","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-01T18:02:07Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"d1acd74a24b24fe9c631b7e0c69ad6cf5600416411c735041c2b18e75d487a71","abstract_canon_sha256":"1ced4821491dd2e57d352cde00766e4d0b3373b8823ab70e2ad617bc9580254b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T00:05:08.346867Z","signature_b64":"AxYcprlZ3gt+jg0R4j/lL02nME5vT+1YZPeZn49xPNlEnQpXBP8R6/HHkGVXVkTaLL7ChOwKYeBb058Pn02sBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a55d8f9caa3fe23437794f97eb2844fc1211d6ee4079c2aead860e7f7b69518a","last_reissued_at":"2026-06-03T00:05:08.346465Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T00:05:08.346465Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"See Less, Specify More: Visual Evidence Budgets for Generalizable VLAs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.RO","authors_text":"Kei Ota, Tatsuya Matsushima, Yueh-Hua Wu","submitted_at":"2026-06-01T18:02:07Z","abstract_excerpt":"Generalization remains a central bottleneck for vision-language-action (VLA) models: under distractors, appearance shifts, and semantically similar tasks, the policy must often infer local execution details from coarse instructions while also deciding which parts of the image matter for control. We present S2 (See Less, Specify More), a framework for improving VLA generalization by training the executor under a cleaner interface.\n  Specify More preserves the original instruction as a stable high-level goal while relabeling each trajectory into refined trajectory- and subtask-level language tha"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.02735","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.02735/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.02735","created_at":"2026-06-03T00:05:08.346519+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.02735v1","created_at":"2026-06-03T00:05:08.346519+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.02735","created_at":"2026-06-03T00:05:08.346519+00:00"},{"alias_kind":"pith_short_12","alias_value":"UVOY7HFKH7RD","created_at":"2026-06-03T00:05:08.346519+00:00"},{"alias_kind":"pith_short_16","alias_value":"UVOY7HFKH7RDIN3Z","created_at":"2026-06-03T00:05:08.346519+00:00"},{"alias_kind":"pith_short_8","alias_value":"UVOY7HFK","created_at":"2026-06-03T00:05:08.346519+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q","json":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q.json","graph_json":"https://pith.science/api/pith-number/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/graph.json","events_json":"https://pith.science/api/pith-number/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/events.json","paper":"https://pith.science/paper/UVOY7HFK"},"agent_actions":{"view_html":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q","download_json":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q.json","view_paper":"https://pith.science/paper/UVOY7HFK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.02735&json=true","fetch_graph":"https://pith.science/api/pith-number/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/graph.json","fetch_events":"https://pith.science/api/pith-number/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/action/storage_attestation","attest_author":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/action/author_attestation","sign_citation":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/action/citation_signature","submit_replication":"https://pith.science/pith/UVOY7HFKH7RDIN3ZJ6L6WKCE7Q/action/replication_record"}},"created_at":"2026-06-03T00:05:08.346519+00:00","updated_at":"2026-06-03T00:05:08.346519+00:00"}