{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:6VHA5ZYDD2XLVMR2INZZKXRP4O","short_pith_number":"pith:6VHA5ZYD","schema_version":"1.0","canonical_sha256":"f54e0ee7031eaebab23a4373955e2fe386e04b4902e9d11b2dad624dac22c968","source":{"kind":"arxiv","id":"2605.14368","version":1},"attestation_state":"computed","paper":{"title":"Where Should Diffusion Enter a Language Model? Geometry-Guided Hidden-State Replacement","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Geometry-based proxies on hidden states identify shallow layers where a diffusion bridge can replace the lower prefix of a pretrained transformer while recovering the hidden state rather than tokens.","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Hyoungjoon Lee, Injin Kong, Yohan Jo","submitted_at":"2026-05-14T04:47:54Z","abstract_excerpt":"Continuous diffusion language models lag behind autoregressive transformers, partly because diffusion is applied in spaces poorly suited to language denoising and token recovery. We propose DiHAL, a geometry-guided diffusion-transformer hybrid that asks where diffusion should enter a pretrained transformer. DiHAL scores layers with geometry-based proxies, selects a diffusion-friendly hidden-state interface, and replaces the lower transformer prefix with a diffusion bridge while retaining the upper layers and original LM head. By reconstructing the selected-layer hidden state rather than tokens"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.14368","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T04:47:54Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"0c11d0fdd6459862a3b7f9442ae7194fff49d5051e7b403f233bd6554c4db1d3","abstract_canon_sha256":"d0ad9859bf8621c515bf8fca927e5607f1a53b99b20166c1966626f3c957f736"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:07.862911Z","signature_b64":"hEfEFdploKjtGDEcZglSlU57uOsejozlwfSVVTzBQgrI8BmPKzj0UEuPLLXhaVRi6Km/lUR5a4KGY0dJ+HhTBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f54e0ee7031eaebab23a4373955e2fe386e04b4902e9d11b2dad624dac22c968","last_reissued_at":"2026-05-17T23:39:07.862219Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:07.862219Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Where Should Diffusion Enter a Language Model? Geometry-Guided Hidden-State Replacement","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Geometry-based proxies on hidden states identify shallow layers where a diffusion bridge can replace the lower prefix of a pretrained transformer while recovering the hidden state rather than tokens.","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Hyoungjoon Lee, Injin Kong, Yohan Jo","submitted_at":"2026-05-14T04:47:54Z","abstract_excerpt":"Continuous diffusion language models lag behind autoregressive transformers, partly because diffusion is applied in spaces poorly suited to language denoising and token recovery. We propose DiHAL, a geometry-guided diffusion-transformer hybrid that asks where diffusion should enter a pretrained transformer. DiHAL scores layers with geometry-based proxies, selects a diffusion-friendly hidden-state interface, and replaces the lower transformer prefix with a diffusion bridge while retaining the upper layers and original LM head. By reconstructing the selected-layer hidden state rather than tokens"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on 8B-scale backbones show that the geometry score predicts effective shallow insertion layers under a fixed bridge-training protocol and that hidden-state recovery improves over continuous diffusion baselines in a diagnostic comparison matching the diffusion/recovery training budget.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That geometry-based proxies computed on pretrained hidden states reliably identify layers where a diffusion bridge can be inserted without extensive additional validation or retraining of the upper layers.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"DiHAL uses geometry proxies to pick where to replace the lower layers of a pretrained transformer with a diffusion bridge for hidden-state reconstruction, improving over token-level diffusion baselines on 8B models.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Geometry-based proxies on hidden states identify shallow layers where a diffusion bridge can replace the lower prefix of a pretrained transformer while recovering the hidden state rather than tokens.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"9df8a31375a1ec23135b2435ac0a473cb0944b022e2b98f766a2aa1b9182b7c7"},"source":{"id":"2605.14368","kind":"arxiv","version":1},"verdict":{"id":"e3146f0d-c9ff-4c99-996b-be8f193336aa","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T02:35:13.035897Z","strongest_claim":"Experiments on 8B-scale backbones show that the geometry score predicts effective shallow insertion layers under a fixed bridge-training protocol and that hidden-state recovery improves over continuous diffusion baselines in a diagnostic comparison matching the diffusion/recovery training budget.","one_line_summary":"DiHAL uses geometry proxies to pick where to replace the lower layers of a pretrained transformer with a diffusion bridge for hidden-state reconstruction, improving over token-level diffusion baselines on 8B models.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That geometry-based proxies computed on pretrained hidden states reliably identify layers where a diffusion bridge can be inserted without extensive additional validation or retraining of the upper layers.","pith_extraction_headline":"Geometry-based proxies on hidden states identify shallow layers where a diffusion bridge can replace the lower prefix of a pretrained transformer while recovering the hidden state rather than tokens."},"references":{"count":49,"sample":[{"doi":"","year":2020,"title":"Tom B. Brown and Benjamin Mann and Nick Ryder and Melanie Subbiah and Jared Kaplan and Prafulla Dhariwal and Arvind Neelakantan and Pranav Shyam and Girish Sastry and Amanda Askell and Sandhini Agarwa","work_id":"83c1a009-e906-464b-90c6-2717b5da3188","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Advances in Neural Information Processing Systems , editor=","work_id":"7616cafc-2b18-4803-b853-9dcba19e56e4","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Qwen3 Technical Report , author=. 2025 , eprint=","work_id":"26c7b6ed-f86e-4ed8-b9ed-b1783d90255b","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"International Conference on Learning Representations , year=","work_id":"344a95e2-ee2d-4b5d-ae3c-c00751e46b03","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Scaling Laws for Diffusion Transformers , author=. 2025 , url=","work_id":"340be762-8861-4b1f-bf9b-5fdaf6c36c86","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":49,"snapshot_sha256":"8ef2c1c602fadc4d1d03af9cc00e41fa1cf54091c83d9fd0a7a5b5f2cf8a0094","internal_anchors":1},"formal_canon":{"evidence_count":2,"snapshot_sha256":"030d58762d6afadd51ede534798f77376911f8d1c74a1bb403bb804d9c4529c0"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.14368","created_at":"2026-05-17T23:39:07.862341+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.14368v1","created_at":"2026-05-17T23:39:07.862341+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14368","created_at":"2026-05-17T23:39:07.862341+00:00"},{"alias_kind":"pith_short_12","alias_value":"6VHA5ZYDD2XL","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"6VHA5ZYDD2XLVMR2","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"6VHA5ZYD","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2606.08810","citing_title":"Continuous Language Diffusion as a Decoder-Interface Problem","ref_index":31,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O","json":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O.json","graph_json":"https://pith.science/api/pith-number/6VHA5ZYDD2XLVMR2INZZKXRP4O/graph.json","events_json":"https://pith.science/api/pith-number/6VHA5ZYDD2XLVMR2INZZKXRP4O/events.json","paper":"https://pith.science/paper/6VHA5ZYD"},"agent_actions":{"view_html":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O","download_json":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O.json","view_paper":"https://pith.science/paper/6VHA5ZYD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.14368&json=true","fetch_graph":"https://pith.science/api/pith-number/6VHA5ZYDD2XLVMR2INZZKXRP4O/graph.json","fetch_events":"https://pith.science/api/pith-number/6VHA5ZYDD2XLVMR2INZZKXRP4O/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O/action/storage_attestation","attest_author":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O/action/author_attestation","sign_citation":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O/action/citation_signature","submit_replication":"https://pith.science/pith/6VHA5ZYDD2XLVMR2INZZKXRP4O/action/replication_record"}},"created_at":"2026-05-17T23:39:07.862341+00:00","updated_at":"2026-05-17T23:39:07.862341+00:00"}