{"work":{"id":"09ba463d-6377-4017-9801-444ffb94b056","openalex_id":null,"doi":null,"arxiv_id":"2409.12186","raw_key":null,"title":"Qwen2.5-Coder Technical Report","authors":null,"authors_text":"Binyuan Hui, Jian Yang, Zeyu Cui, Jiaxi Yang, Dayiheng Liu, Lei Zhang","year":2024,"venue":"cs.CL","abstract":"In this report, we introduce the Qwen2.5-Coder series, a significant upgrade from its predecessor, CodeQwen1.5. This series includes six models: Qwen2.5-Coder-(0.5B/1.5B/3B/7B/14B/32B). As a code-specific model, Qwen2.5-Coder is built upon the Qwen2.5 architecture and continues pretrained on a vast corpus of over 5.5 trillion tokens. Through meticulous data cleaning, scalable synthetic data generation, and balanced data mixing, Qwen2.5-Coder demonstrates impressive code generation capabilities while retaining general and math skills. These models have been evaluated on a wide range of code-related tasks, achieving state-of-the-art (SOTA) performance across more than 10 benchmarks, including code generation, completion, reasoning, and repair, consistently outperforming larger models of the same model size. We believe that the release of the Qwen2.5-Coder series will advance research in code intelligence and, with its permissive licensing, support wider adoption by developers in real-world applications.","external_url":"https://arxiv.org/abs/2409.12186","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-14T21:53:02.268987+00:00","pith_arxiv_id":"2409.12186","created_at":"2026-05-08T17:28:42.025770+00:00","updated_at":"2026-05-14T21:53:02.268987+00:00","title_quality_ok":false,"display_title":"Qwen2.5-Coder Technical Report","render_title":"Qwen2.5-Coder Technical Report"},"hub":{"state":{"work_id":"09ba463d-6377-4017-9801-444ffb94b056","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":127,"external_cited_by_count":null,"distinct_field_count":13,"first_pith_cited_at":"2024-06-01T17:48:15+00:00","last_pith_cited_at":"2026-05-13T13:47:03+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-14T21:56:14.438896+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[],"polarity_counts":[],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"Qwen2.5-Coder Technical Report","claims":[{"claim_text":"In this report, we introduce the Qwen2.5-Coder series, a significant upgrade from its predecessor, CodeQwen1.5. This series includes six models: Qwen2.5-Coder-(0.5B/1.5B/3B/7B/14B/32B). As a code-specific model, Qwen2.5-Coder is built upon the Qwen2.5 architecture and continues pretrained on a vast corpus of over 5.5 trillion tokens. Through meticulous data cleaning, scalable synthetic data generation, and balanced data mixing, Qwen2.5-Coder demonstrates impressive code generation capabilities while retaining general and math skills. These models have been evaluated on a wide range of code-rel","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2.5-Coder Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T01:24:17.888597+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[{"id":"785c6603-fbf9-4a35-bfb2-686be03040d6","orcid":null,"display_name":"Binyuan Hui"},{"id":"28b4dae0-f5fc-4fdd-952e-8d170764a5ba","orcid":null,"display_name":"Jian Yang"},{"id":"2347b273-9b5e-459d-b285-0b0658cd9167","orcid":null,"display_name":"Zeyu Cui"},{"id":"12c909f3-88dc-418c-b7ce-afbc5b61f374","orcid":null,"display_name":"Jiaxi Yang"},{"id":"1c6b3eab-22ec-4e62-823a-ee82c33ecb41","orcid":null,"display_name":"Dayiheng Liu"},{"id":"c354673e-74a5-4633-a59b-91d77cccd8aa","orcid":null,"display_name":"Lei Zhang"}]},"error":null,"updated_at":"2026-05-14T01:24:02.526420+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T01:24:09.165495+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":42},{"title":"Code Llama: Open Foundation Models for Code","work_id":"e73bffa4-7620-47ac-9327-259a60db52ca","shared_citers":33},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":32},{"title":"DeepSeek-Coder: When the Large Language Model Meets Programming -- The Rise of Code Intelligence","work_id":"f22dae5a-27e2-41d0-a061-c4286418dee3","shared_citers":31},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":29},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":28},{"title":"Program Synthesis with Large Language Models","work_id":"fd241a05-03b9-4de2-9588-9d77ce176125","shared_citers":28},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":27},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":26},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":21},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":20},{"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","work_id":"ea9e51ce-1e75-4182-92d8-4d25f70d2ee4","shared_citers":16},{"title":"StarCoder 2 and The Stack v2: The Next Generation","work_id":"2495cc72-f326-4c23-8a39-d9a08cf583e4","shared_citers":14},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":14},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":13},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":12},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":12},{"title":"Qwen2.5-Math Technical Report: Toward Mathematical Expert Model via Self-Improvement","work_id":"a097c5d4-6d32-46ee-9826-57d532bbfc9c","shared_citers":12},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":10},{"title":"Measuring Mathematical Problem Solving With the MATH Dataset","work_id":"50652ac6-fb7c-4675-a2c2-159c241feb17","shared_citers":10},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":9},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":9},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":8},{"title":"arXiv preprint arXiv:2207.10397 , year=","work_id":"1280b158-2c5d-444c-8584-ae187cbe83af","shared_citers":7}],"time_series":[{"n":1,"year":2024},{"n":6,"year":2025},{"n":113,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T01:22:28.695271+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T01:24:05.795257+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"Qwen2.5-Coder Technical Report","claims":[{"claim_text":"In this report, we introduce the Qwen2.5-Coder series, a significant upgrade from its predecessor, CodeQwen1.5. This series includes six models: Qwen2.5-Coder-(0.5B/1.5B/3B/7B/14B/32B). As a code-specific model, Qwen2.5-Coder is built upon the Qwen2.5 architecture and continues pretrained on a vast corpus of over 5.5 trillion tokens. Through meticulous data cleaning, scalable synthetic data generation, and balanced data mixing, Qwen2.5-Coder demonstrates impressive code generation capabilities while retaining general and math skills. These models have been evaluated on a wide range of code-rel","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2.5-Coder Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T01:24:13.819520+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Qwen2.5-Coder Technical Report","claims":[{"claim_text":"In this report, we introduce the Qwen2.5-Coder series, a significant upgrade from its predecessor, CodeQwen1.5. This series includes six models: Qwen2.5-Coder-(0.5B/1.5B/3B/7B/14B/32B). As a code-specific model, Qwen2.5-Coder is built upon the Qwen2.5 architecture and continues pretrained on a vast corpus of over 5.5 trillion tokens. Through meticulous data cleaning, scalable synthetic data generation, and balanced data mixing, Qwen2.5-Coder demonstrates impressive code generation capabilities while retaining general and math skills. These models have been evaluated on a wide range of code-rel","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2.5-Coder Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T01:24:13.804741+00:00"}},"summary":{"title":"Qwen2.5-Coder Technical Report","claims":[{"claim_text":"In this report, we introduce the Qwen2.5-Coder series, a significant upgrade from its predecessor, CodeQwen1.5. This series includes six models: Qwen2.5-Coder-(0.5B/1.5B/3B/7B/14B/32B). As a code-specific model, Qwen2.5-Coder is built upon the Qwen2.5 architecture and continues pretrained on a vast corpus of over 5.5 trillion tokens. Through meticulous data cleaning, scalable synthetic data generation, and balanced data mixing, Qwen2.5-Coder demonstrates impressive code generation capabilities while retaining general and math skills. These models have been evaluated on a wide range of code-rel","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2.5-Coder Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":42},{"title":"Code Llama: Open Foundation Models for Code","work_id":"e73bffa4-7620-47ac-9327-259a60db52ca","shared_citers":33},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":32},{"title":"DeepSeek-Coder: When the Large Language Model Meets Programming -- The Rise of Code Intelligence","work_id":"f22dae5a-27e2-41d0-a061-c4286418dee3","shared_citers":31},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":29},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":28},{"title":"Program Synthesis with Large Language Models","work_id":"fd241a05-03b9-4de2-9588-9d77ce176125","shared_citers":28},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":27},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":26},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":21},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":20},{"title":"LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code","work_id":"ea9e51ce-1e75-4182-92d8-4d25f70d2ee4","shared_citers":16},{"title":"StarCoder 2 and The Stack v2: The Next Generation","work_id":"2495cc72-f326-4c23-8a39-d9a08cf583e4","shared_citers":14},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":14},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":13},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":12},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":12},{"title":"Qwen2.5-Math Technical Report: Toward Mathematical Expert Model via Self-Improvement","work_id":"a097c5d4-6d32-46ee-9826-57d532bbfc9c","shared_citers":12},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":10},{"title":"Measuring Mathematical Problem Solving With the MATH Dataset","work_id":"50652ac6-fb7c-4675-a2c2-159c241feb17","shared_citers":10},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":9},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":9},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":8},{"title":"arXiv preprint arXiv:2207.10397 , year=","work_id":"1280b158-2c5d-444c-8584-ae187cbe83af","shared_citers":7}],"time_series":[{"n":1,"year":2024},{"n":6,"year":2025},{"n":113,"year":2026}],"dependency_candidates":[]},"authors":[{"id":"785c6603-fbf9-4a35-bfb2-686be03040d6","orcid":null,"display_name":"Binyuan Hui","source":"manual","import_confidence":0.72},{"id":"1c6b3eab-22ec-4e62-823a-ee82c33ecb41","orcid":null,"display_name":"Dayiheng Liu","source":"manual","import_confidence":0.72},{"id":"28b4dae0-f5fc-4fdd-952e-8d170764a5ba","orcid":null,"display_name":"Jian Yang","source":"manual","import_confidence":0.72},{"id":"12c909f3-88dc-418c-b7ce-afbc5b61f374","orcid":null,"display_name":"Jiaxi Yang","source":"manual","import_confidence":0.72},{"id":"c354673e-74a5-4633-a59b-91d77cccd8aa","orcid":null,"display_name":"Lei Zhang","source":"manual","import_confidence":0.72},{"id":"2347b273-9b5e-459d-b285-0b0658cd9167","orcid":null,"display_name":"Zeyu Cui","source":"manual","import_confidence":0.72}]}}