{"total":12,"items":[{"citing_arxiv_id":"2605.12836","ref_index":2,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Discrete Stochastic Localization for Non-autoregressive Generation","primary_cat":"cs.LG","submitted_at":"2026-05-13T00:12:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Discrete Stochastic Localization provides a continuous-state framework with SNR-invariant denoisers on unit-sphere embeddings, enabling one network to support multiple per-token noise paths and improving MAUVE on OpenWebText.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10938","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"ELF: Embedded Language Flows","primary_cat":"cs.CL","submitted_at":"2026-05-11T17:59:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ELF is a continuous embedding-space flow matching model for language that stays continuous until the last step and outperforms prior discrete and continuous diffusion language models with fewer sampling steps.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10518","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Infinite Mask Diffusion for Few-Step Distillation","primary_cat":"cs.CL","submitted_at":"2026-05-11T13:07:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Infinite Mask Diffusion Models use stochastic infinite-state masks to overcome the factorization error lower bound in standard masked diffusion, achieving superior few-step performance on language tasks via distillation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07748","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"TextLDM: Language Modeling with Continuous Latent Diffusion","primary_cat":"cs.CL","submitted_at":"2026-05-08T13:54:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TextLDM applies DiT-style latent diffusion with flow matching to language modeling via a REPA-aligned VAE, outperforming prior diffusion LMs and matching GPT-2 when trained from scratch on OpenWebText2.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06548","ref_index":21,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Continuous Latent Diffusion Language Model","primary_cat":"cs.CL","submitted_at":"2026-05-07T16:44:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Cola DLM proposes a hierarchical latent diffusion model that learns a text-to-latent mapping, fits a global semantic prior in continuous space with a block-causal DiT, and performs conditional decoding, establishing latent prior modeling as an alternative to token-level autoregressive language model","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01373","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Focus on the Core: Empowering Diffusion Large Language Models by Self-Contrast","primary_cat":"cs.CL","submitted_at":"2026-05-02T10:46:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"FoCore uses self-contrast on early-converging high-density tokens to boost diffusion LLM quality on reasoning benchmarks while cutting decoding steps by over 2x.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00161","ref_index":46,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Consistent Diffusion Language Models","primary_cat":"cs.LG","submitted_at":"2026-04-30T19:31:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CDLM trains denoisers to be path-invariant across stochastic posterior bridges in discrete diffusion, unifying prior methods and achieving new SOTA few-step text generation performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.13413","ref_index":8,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Dataset-Level Metrics Attenuate Non-Determinism: A Fine-Grained Non-Determinism Evaluation in Diffusion Language Models","primary_cat":"cs.LG","submitted_at":"2026-04-15T02:31:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Dataset-level metrics in diffusion language models mask substantial sample-level non-determinism that varies with model and system factors, which a new Factor Variance Attribution metric can decompose.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11748","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"LangFlow: Continuous Diffusion Rivals Discrete in Language Modeling","primary_cat":"cs.CL","submitted_at":"2026-04-13T17:21:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LangFlow is the first continuous diffusion language model to rival discrete diffusion on perplexity and generative perplexity while exceeding autoregressive baselines on several zero-shot tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.05497","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models","primary_cat":"cs.AI","submitted_at":"2026-04-07T06:41:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Position and step penalty plus visual reasoning guidance fix premature answering and weak visual grounding in diffusion MLLMs, delivering up to 7.5% accuracy gains and over 3x speedup.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2508.15487","ref_index":6,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Dream 7B: Diffusion Large Language Models","primary_cat":"cs.CL","submitted_at":"2025-08-21T12:09:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Dream 7B is a 7B diffusion LLM that refines sequences in parallel via denoising and outperforms prior diffusion models on general, mathematical, and coding benchmarks with added flexibility in generation order and quality-speed tradeoffs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.09992","ref_index":47,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Large Language Diffusion Models","primary_cat":"cs.CL","submitted_at":"2025-02-14T08:23:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"LLaDA is a scalable diffusion-based language model that matches autoregressive LLMs like LLaMA3 8B on tasks and surpasses GPT-4o on reversal poem completion.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}