{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:26HI5JTCZSCNBSUVPC6MD4G5XZ","short_pith_number":"pith:26HI5JTC","canonical_record":{"source":{"id":"2605.10347","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-11T10:49:31Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"d5258255c7a409ed66fd43b173ae371c09d053c3ac3028f02a7c29c74b7a6606","abstract_canon_sha256":"2f9dda26a762881c0544da5ce8f68c69067ceb15e3a83e61ca4ef98185f45e4e"},"schema_version":"1.0"},"canonical_sha256":"d78e8ea662cc84d0ca9578bcc1f0ddbe42bed3c7778f4bcf39f62d27ea5c2ee3","source":{"kind":"arxiv","id":"2605.10347","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.10347","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"arxiv_version","alias_value":"2605.10347v2","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.10347","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_12","alias_value":"26HI5JTCZSCN","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_16","alias_value":"26HI5JTCZSCNBSUV","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_8","alias_value":"26HI5JTC","created_at":"2026-05-25T02:01:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:26HI5JTCZSCNBSUVPC6MD4G5XZ","target":"record","payload":{"canonical_record":{"source":{"id":"2605.10347","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-11T10:49:31Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"d5258255c7a409ed66fd43b173ae371c09d053c3ac3028f02a7c29c74b7a6606","abstract_canon_sha256":"2f9dda26a762881c0544da5ce8f68c69067ceb15e3a83e61ca4ef98185f45e4e"},"schema_version":"1.0"},"canonical_sha256":"d78e8ea662cc84d0ca9578bcc1f0ddbe42bed3c7778f4bcf39f62d27ea5c2ee3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:22.922554Z","signature_b64":"RmK8uHtpzRXqUO2UqcelViZ0o5x8djL0OsUxljkMS+TMaH3gy2KEJoti+Zlgas4l9Oqm7AVzuSoaucKlH0yvCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d78e8ea662cc84d0ca9578bcc1f0ddbe42bed3c7778f4bcf39f62d27ea5c2ee3","last_reissued_at":"2026-05-25T02:01:22.921902Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:22.921902Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.10347","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4T2/p7HNYMWdMH9MP3ZMPadnPHciJq1BBomZVTsSOgJ3zOB3XhZMKTx1LPaS5KIRbqpE/iyGbVp5GztS2nNaCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T18:23:59.050780Z"},"content_sha256":"d81ae46a8a04bea4de6393bf1cf2c447935dd3916e34618e6bbc0a571b6c2054","schema_version":"1.0","event_id":"sha256:d81ae46a8a04bea4de6393bf1cf2c447935dd3916e34618e6bbc0a571b6c2054"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:26HI5JTCZSCNBSUVPC6MD4G5XZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"How Mobile World Model Guides GUI Agents?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"World models improve mobile GUI agent performance as training supervision but show limited value in post-hoc self-reflection for overconfident agents.","cross_cats":["cs.CL"],"primary_cat":"cs.AI","authors_text":"Bo An, Heng Qu, Jian Luan, Jiaxing Li, Kun Huang, Pengzhi Gao, Weikai Xu, Wei Liu, Xiaolin Hu, Yuhan Chen, Yunren Feng, Yuxuan Liu, Zhizheng Jiang","submitted_at":"2026-05-11T10:49:31Z","abstract_excerpt":"Recent advances in vision-language models have enabled mobile GUI agents to perceive visual interfaces and execute user instructions, but reliable prediction of action consequences remains critical for long-horizon and high-risk interactions. Existing mobile world models provide either text-based or image-based future states, yet it remains unclear which representation is useful, whether generated rollouts can replace real environments, and how test-time guidance helps agents of different strengths. To answer the above questions, we filter and annotate mobile world-model data, then train world"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"world-model-generated trajectories can provide transferable interaction experience in the training process and improve agents' end-to-end task performance, although these data do not preserve the original distribution; for overconfident mobile agents with low action entropy, posterior self-reflection provides limited gains, suggesting that world models are more effective as prior perception or training supervision than as universal post-hoc verifiers.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the downstream evaluations on AITZ, AndroidControl, and AndroidWorld, together with the chosen agent strengths and entropy measures, isolate the contribution of the world models without confounding effects from data filtering choices or benchmark construction.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Mobile world models in text, image, and code modalities reach state-of-the-art on their benchmarks and improve downstream GUI agent performance, with code best for in-distribution accuracy and text more robust for out-of-distribution use.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"World models improve mobile GUI agent performance as training supervision but show limited value in post-hoc self-reflection for overconfident agents.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"7dd947bf28a582cbb18099598d3e31dcee879af2032dd7d9dbb904d4d0cd55fc"},"source":{"id":"2605.10347","kind":"arxiv","version":2},"verdict":{"id":"a900abe1-8dcb-442c-82b3-adb5adebca91","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-12T04:23:32.673720Z","strongest_claim":"world-model-generated trajectories can provide transferable interaction experience in the training process and improve agents' end-to-end task performance, although these data do not preserve the original distribution; for overconfident mobile agents with low action entropy, posterior self-reflection provides limited gains, suggesting that world models are more effective as prior perception or training supervision than as universal post-hoc verifiers.","one_line_summary":"Mobile world models in text, image, and code modalities reach state-of-the-art on their benchmarks and improve downstream GUI agent performance, with code best for in-distribution accuracy and text more robust for out-of-distribution use.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the downstream evaluations on AITZ, AndroidControl, and AndroidWorld, together with the chosen agent strengths and entropy measures, isolate the contribution of the world models without confounding effects from data filtering choices or benchmark construction.","pith_extraction_headline":"World models improve mobile GUI agent performance as training supervision but show limited value in post-hoc self-reflection for overconfident agents."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.10347/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-20T06:02:01.143802Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T15:35:32.867836Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T11:31:18.689834Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T09:25:38.246111Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"901f033203233229e327902b473b7eb29f2cff5f8810eb258193c9a81a0d84a2"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"546288c7476d170e2b75588368ecc97f9bce5afc6d8288c01546fd2ac106af8d"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"a900abe1-8dcb-442c-82b3-adb5adebca91"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3Hyv0IWQa60b8wzW61Q1mqoQ4WKTu6QojdVRsNEsiqyz+Zao92Z1VhWMbcXidDKRu/HwzlxIzRpjVD249rn1Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T18:23:59.051853Z"},"content_sha256":"5e84f1eb20ff7cf7cb200fa26d1599e968736f8a9dacc1910ad4f08ab097e72e","schema_version":"1.0","event_id":"sha256:5e84f1eb20ff7cf7cb200fa26d1599e968736f8a9dacc1910ad4f08ab097e72e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/26HI5JTCZSCNBSUVPC6MD4G5XZ/bundle.json","state_url":"https://pith.science/pith/26HI5JTCZSCNBSUVPC6MD4G5XZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/26HI5JTCZSCNBSUVPC6MD4G5XZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T18:23:59Z","links":{"resolver":"https://pith.science/pith/26HI5JTCZSCNBSUVPC6MD4G5XZ","bundle":"https://pith.science/pith/26HI5JTCZSCNBSUVPC6MD4G5XZ/bundle.json","state":"https://pith.science/pith/26HI5JTCZSCNBSUVPC6MD4G5XZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/26HI5JTCZSCNBSUVPC6MD4G5XZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:26HI5JTCZSCNBSUVPC6MD4G5XZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2f9dda26a762881c0544da5ce8f68c69067ceb15e3a83e61ca4ef98185f45e4e","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-11T10:49:31Z","title_canon_sha256":"d5258255c7a409ed66fd43b173ae371c09d053c3ac3028f02a7c29c74b7a6606"},"schema_version":"1.0","source":{"id":"2605.10347","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.10347","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"arxiv_version","alias_value":"2605.10347v2","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.10347","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_12","alias_value":"26HI5JTCZSCN","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_16","alias_value":"26HI5JTCZSCNBSUV","created_at":"2026-05-25T02:01:22Z"},{"alias_kind":"pith_short_8","alias_value":"26HI5JTC","created_at":"2026-05-25T02:01:22Z"}],"graph_snapshots":[{"event_id":"sha256:5e84f1eb20ff7cf7cb200fa26d1599e968736f8a9dacc1910ad4f08ab097e72e","target":"graph","created_at":"2026-05-25T02:01:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"world-model-generated trajectories can provide transferable interaction experience in the training process and improve agents' end-to-end task performance, although these data do not preserve the original distribution; for overconfident mobile agents with low action entropy, posterior self-reflection provides limited gains, suggesting that world models are more effective as prior perception or training supervision than as universal post-hoc verifiers."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the downstream evaluations on AITZ, AndroidControl, and AndroidWorld, together with the chosen agent strengths and entropy measures, isolate the contribution of the world models without confounding effects from data filtering choices or benchmark construction."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Mobile world models in text, image, and code modalities reach state-of-the-art on their benchmarks and improve downstream GUI agent performance, with code best for in-distribution accuracy and text more robust for out-of-distribution use."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"World models improve mobile GUI agent performance as training supervision but show limited value in post-hoc self-reflection for overconfident agents."}],"snapshot_sha256":"7dd947bf28a582cbb18099598d3e31dcee879af2032dd7d9dbb904d4d0cd55fc"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"546288c7476d170e2b75588368ecc97f9bce5afc6d8288c01546fd2ac106af8d"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-20T06:02:01.143802Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T15:35:32.867836Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T11:31:18.689834Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T09:25:38.246111Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.10347/integrity.json","findings":[],"snapshot_sha256":"901f033203233229e327902b473b7eb29f2cff5f8810eb258193c9a81a0d84a2","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent advances in vision-language models have enabled mobile GUI agents to perceive visual interfaces and execute user instructions, but reliable prediction of action consequences remains critical for long-horizon and high-risk interactions. Existing mobile world models provide either text-based or image-based future states, yet it remains unclear which representation is useful, whether generated rollouts can replace real environments, and how test-time guidance helps agents of different strengths. To answer the above questions, we filter and annotate mobile world-model data, then train world","authors_text":"Bo An, Heng Qu, Jian Luan, Jiaxing Li, Kun Huang, Pengzhi Gao, Weikai Xu, Wei Liu, Xiaolin Hu, Yuhan Chen, Yunren Feng, Yuxuan Liu, Zhizheng Jiang","cross_cats":["cs.CL"],"headline":"World models improve mobile GUI agent performance as training supervision but show limited value in post-hoc self-reflection for overconfident agents.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-11T10:49:31Z","title":"How Mobile World Model Guides GUI Agents?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.10347","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-12T04:23:32.673720Z","id":"a900abe1-8dcb-442c-82b3-adb5adebca91","model_set":{"reader":"grok-4.3"},"one_line_summary":"Mobile world models in text, image, and code modalities reach state-of-the-art on their benchmarks and improve downstream GUI agent performance, with code best for in-distribution accuracy and text more robust for out-of-distribution use.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"World models improve mobile GUI agent performance as training supervision but show limited value in post-hoc self-reflection for overconfident agents.","strongest_claim":"world-model-generated trajectories can provide transferable interaction experience in the training process and improve agents' end-to-end task performance, although these data do not preserve the original distribution; for overconfident mobile agents with low action entropy, posterior self-reflection provides limited gains, suggesting that world models are more effective as prior perception or training supervision than as universal post-hoc verifiers.","weakest_assumption":"That the downstream evaluations on AITZ, AndroidControl, and AndroidWorld, together with the chosen agent strengths and entropy measures, isolate the contribution of the world models without confounding effects from data filtering choices or benchmark construction."}},"verdict_id":"a900abe1-8dcb-442c-82b3-adb5adebca91"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d81ae46a8a04bea4de6393bf1cf2c447935dd3916e34618e6bbc0a571b6c2054","target":"record","created_at":"2026-05-25T02:01:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2f9dda26a762881c0544da5ce8f68c69067ceb15e3a83e61ca4ef98185f45e4e","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-11T10:49:31Z","title_canon_sha256":"d5258255c7a409ed66fd43b173ae371c09d053c3ac3028f02a7c29c74b7a6606"},"schema_version":"1.0","source":{"id":"2605.10347","kind":"arxiv","version":2}},"canonical_sha256":"d78e8ea662cc84d0ca9578bcc1f0ddbe42bed3c7778f4bcf39f62d27ea5c2ee3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d78e8ea662cc84d0ca9578bcc1f0ddbe42bed3c7778f4bcf39f62d27ea5c2ee3","first_computed_at":"2026-05-25T02:01:22.921902Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:01:22.921902Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RmK8uHtpzRXqUO2UqcelViZ0o5x8djL0OsUxljkMS+TMaH3gy2KEJoti+Zlgas4l9Oqm7AVzuSoaucKlH0yvCQ==","signature_status":"signed_v1","signed_at":"2026-05-25T02:01:22.922554Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.10347","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d81ae46a8a04bea4de6393bf1cf2c447935dd3916e34618e6bbc0a571b6c2054","sha256:5e84f1eb20ff7cf7cb200fa26d1599e968736f8a9dacc1910ad4f08ab097e72e"],"state_sha256":"40cd09a5dbe2f5a9ca52ae061393a2b1b8aeced7323670fa08c741848165a24f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"quQWbxJnUzAdeNLP5PtLm4XmNikz4fGNnBVinrR4hKqjlcDVVEuRDcdQeG63MmJj4HTvArwzvRaJZ0oj8HgeAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T18:23:59.056336Z","bundle_sha256":"4e234f7bc39fdf616f41a708693a77e1fe67f5534411daa693a243d283fb1eeb"}}