{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:5H4KVUGIPKVAJVT32AXALKNIER","short_pith_number":"pith:5H4KVUGI","schema_version":"1.0","canonical_sha256":"e9f8aad0c87aaa04d67bd02e05a9a8244cd57b316f12f1e34698df46bb8038f3","source":{"kind":"arxiv","id":"2606.15932","version":2},"attestation_state":"computed","paper":{"title":"Beyond NL2Code: A Structured Survey of Multimodal Code Intelligence","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Haibo Qiu, Haoyue Yang, Jian Hu, Jing Huang, Jingyu Xiao, Jinhe Bi, Lei Chen, Lei Jiang, Peng Shi, Qiaosheng Chen, Qiushi Sun, Shuai Fu, Siqi Yang, Xianzhen Luo, Xuanle Zhao, Xuexin Liu, Yufeng Zhong, Zhenlin Wei, Zhixiong Zeng","submitted_at":"2026-06-14T17:21:43Z","abstract_excerpt":"While Large Language Models (LLMs) have substantially advanced text-to-code synthesis, many real programming tasks specify intent through visual artifacts such as screenshots, charts, vector drawings, videos, and interactive states. These tasks require models to connect visual perception to executable programs, because correctness depends not only on syntax but also on layout, data semantics, interaction behavior, and domain-specific constraints that apply after execution. This survey examines Multimodal Code Intelligence, covering systems that generate, edit, refine, or reason with code under"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.15932","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-14T17:21:43Z","cross_cats_sorted":[],"title_canon_sha256":"ea667057c001ff9b5b44c8f1a866b5c9567882fb73fff0585dd0cd176171ddbe","abstract_canon_sha256":"33261d82316a73cd99309782b62630cb6560dfbabbfe9ff23e5a3ae226757008"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:40.091444Z","signature_b64":"8BXTrBNY7Xu8U5uGoeU/D0Q3Kg8PTHTDuvYymjX3RNBkVzgmQCEDci6bBC+Z/0OUliOZtNtySYw9qNibbOrHCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e9f8aad0c87aaa04d67bd02e05a9a8244cd57b316f12f1e34698df46bb8038f3","last_reissued_at":"2026-06-19T16:10:40.090971Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:40.090971Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Beyond NL2Code: A Structured Survey of Multimodal Code Intelligence","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Haibo Qiu, Haoyue Yang, Jian Hu, Jing Huang, Jingyu Xiao, Jinhe Bi, Lei Chen, Lei Jiang, Peng Shi, Qiaosheng Chen, Qiushi Sun, Shuai Fu, Siqi Yang, Xianzhen Luo, Xuanle Zhao, Xuexin Liu, Yufeng Zhong, Zhenlin Wei, Zhixiong Zeng","submitted_at":"2026-06-14T17:21:43Z","abstract_excerpt":"While Large Language Models (LLMs) have substantially advanced text-to-code synthesis, many real programming tasks specify intent through visual artifacts such as screenshots, charts, vector drawings, videos, and interactive states. These tasks require models to connect visual perception to executable programs, because correctness depends not only on syntax but also on layout, data semantics, interaction behavior, and domain-specific constraints that apply after execution. This survey examines Multimodal Code Intelligence, covering systems that generate, edit, refine, or reason with code under"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.15932","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.15932/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.15932","created_at":"2026-06-19T16:10:40.091029+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.15932v2","created_at":"2026-06-19T16:10:40.091029+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.15932","created_at":"2026-06-19T16:10:40.091029+00:00"},{"alias_kind":"pith_short_12","alias_value":"5H4KVUGIPKVA","created_at":"2026-06-19T16:10:40.091029+00:00"},{"alias_kind":"pith_short_16","alias_value":"5H4KVUGIPKVAJVT3","created_at":"2026-06-19T16:10:40.091029+00:00"},{"alias_kind":"pith_short_8","alias_value":"5H4KVUGI","created_at":"2026-06-19T16:10:40.091029+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER","json":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER.json","graph_json":"https://pith.science/api/pith-number/5H4KVUGIPKVAJVT32AXALKNIER/graph.json","events_json":"https://pith.science/api/pith-number/5H4KVUGIPKVAJVT32AXALKNIER/events.json","paper":"https://pith.science/paper/5H4KVUGI"},"agent_actions":{"view_html":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER","download_json":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER.json","view_paper":"https://pith.science/paper/5H4KVUGI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.15932&json=true","fetch_graph":"https://pith.science/api/pith-number/5H4KVUGIPKVAJVT32AXALKNIER/graph.json","fetch_events":"https://pith.science/api/pith-number/5H4KVUGIPKVAJVT32AXALKNIER/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER/action/storage_attestation","attest_author":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER/action/author_attestation","sign_citation":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER/action/citation_signature","submit_replication":"https://pith.science/pith/5H4KVUGIPKVAJVT32AXALKNIER/action/replication_record"}},"created_at":"2026-06-19T16:10:40.091029+00:00","updated_at":"2026-06-19T16:10:40.091029+00:00"}