{"work":{"id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","openalex_id":null,"doi":null,"arxiv_id":"2407.10671","raw_key":null,"title":"Qwen2 Technical Report","authors":null,"authors_text":"An Yang, Baosong Yang, Binyuan Hui, Bo Zheng, Bowen Yu, Chang Zhou","year":2024,"venue":"cs.CL","abstract":"This report introduces the Qwen2 series, the latest addition to our large language models and large multimodal models. We release a comprehensive suite of foundational and instruction-tuned language models, encompassing a parameter range from 0.5 to 72 billion, featuring dense models and a Mixture-of-Experts model. Qwen2 surpasses most prior open-weight models, including its predecessor Qwen1.5, and exhibits competitive performance relative to proprietary models across diverse benchmarks on language understanding, generation, multilingual proficiency, coding, mathematics, and reasoning.\n  The flagship model, Qwen2-72B, showcases remarkable performance: 84.2 on MMLU, 37.9 on GPQA, 64.6 on HumanEval, 89.5 on GSM8K, and 82.4 on BBH as a base language model. The instruction-tuned variant, Qwen2-72B-Instruct, attains 9.1 on MT-Bench, 48.1 on Arena-Hard, and 35.7 on LiveCodeBench. Moreover, Qwen2 demonstrates robust multilingual capabilities, proficient in approximately 30 languages, spanning English, Chinese, Spanish, French, German, Arabic, Russian, Korean, Japanese, Thai, Vietnamese, and more, underscoring its versatility and global reach.\n  To foster community innovation and accessibility, we have made the Qwen2 model weights openly available on Hugging Face and ModelScope, and the supplementary materials including example code on GitHub. These platforms also include resources for quantization, fine-tuning, and deployment, facilitating a wide range of applications and research endeavors.","external_url":"https://arxiv.org/abs/2407.10671","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-14T21:17:58.736071+00:00","pith_arxiv_id":"2407.10671","created_at":"2026-05-08T21:54:17.961159+00:00","updated_at":"2026-05-14T21:17:58.736071+00:00","title_quality_ok":false,"display_title":"Qwen2 Technical Report","render_title":"Qwen2 Technical Report"},"hub":{"state":{"work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":158,"external_cited_by_count":null,"distinct_field_count":17,"first_pith_cited_at":"2023-03-31T17:28:46+00:00","last_pith_cited_at":"2026-05-13T16:12:33+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-14T21:56:14.409422+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[{"context_role":"background","n":2},{"context_role":"dataset","n":1},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":2},{"context_polarity":"use_dataset","n":1},{"context_polarity":"use_method","n":1}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"Qwen2 Technical Report","claims":[{"claim_text":"This report introduces the Qwen2 series, the latest addition to our large language models and large multimodal models. We release a comprehensive suite of foundational and instruction-tuned language models, encompassing a parameter range from 0.5 to 72 billion, featuring dense models and a Mixture-of-Experts model. Qwen2 surpasses most prior open-weight models, including its predecessor Qwen1.5, and exhibits competitive performance relative to proprietary models across diverse benchmarks on language understanding, generation, multilingual proficiency, coding, mathematics, and reasoning.\n  The ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2 Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T22:13:39.069600+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[{"id":"f6e1a34c-959d-4c3d-b317-1a65d6fe682c","orcid":null,"display_name":"An Yang"},{"id":"3e0735af-daed-4e22-b48a-b7778e5e9a45","orcid":null,"display_name":"Baosong Yang"},{"id":"785c6603-fbf9-4a35-bfb2-686be03040d6","orcid":null,"display_name":"Binyuan Hui"},{"id":"cc48b5a8-a38d-4d8c-ace7-5add79c07fd0","orcid":null,"display_name":"Bo Zheng"},{"id":"cbf01de2-0d26-46f1-9d5a-d5752c92dae6","orcid":null,"display_name":"Bowen Yu"},{"id":"3e6ef8c8-5caa-4c37-a2d5-2f19a328f627","orcid":null,"display_name":"Chang Zhou"}]},"error":null,"updated_at":"2026-05-13T22:03:37.897919+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-13T22:03:37.406334+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":45},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":31},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":25},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":25},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":24},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":24},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":22},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":22},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":20},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":18},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":17},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":15},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":15},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":14},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":13},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":12},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":11},{"title":"Gemma 2: Improving Open Language Models at a Practical Size","work_id":"4dd94e2f-2b27-4cbf-88a0-4910f0772a57","shared_citers":11},{"title":"Program Synthesis with Large Language Models","work_id":"fd241a05-03b9-4de2-9588-9d77ce176125","shared_citers":11},{"title":"Gemma: Open Models Based on Gemini Research and Technology","work_id":"a9ea2870-df28-40b8-a9e0-a7e9a116f793","shared_citers":10},{"title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach","work_id":"41fe12c4-e538-4890-a244-480650ed3078","shared_citers":10},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":9},{"title":"GPQA: A Graduate-Level Google-Proof Q&A Benchmark","work_id":"9e2a976b-f5ad-4aee-af5c-243fe0fe75d2","shared_citers":9},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":9}],"time_series":[{"n":1,"year":2023},{"n":6,"year":2024},{"n":6,"year":2025},{"n":135,"year":2026}]},"error":null,"updated_at":"2026-05-13T22:03:37.486714+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"fixed":1,"items":[{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-13T22:03:36.431704+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"Qwen2 Technical Report","claims":[{"claim_text":"This report introduces the Qwen2 series, the latest addition to our large language models and large multimodal models. We release a comprehensive suite of foundational and instruction-tuned language models, encompassing a parameter range from 0.5 to 72 billion, featuring dense models and a Mixture-of-Experts model. Qwen2 surpasses most prior open-weight models, including its predecessor Qwen1.5, and exhibits competitive performance relative to proprietary models across diverse benchmarks on language understanding, generation, multilingual proficiency, coding, mathematics, and reasoning.\n  The ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2 Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T22:03:38.847439+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Qwen2 Technical Report","claims":[{"claim_text":"This report introduces the Qwen2 series, the latest addition to our large language models and large multimodal models. We release a comprehensive suite of foundational and instruction-tuned language models, encompassing a parameter range from 0.5 to 72 billion, featuring dense models and a Mixture-of-Experts model. Qwen2 surpasses most prior open-weight models, including its predecessor Qwen1.5, and exhibits competitive performance relative to proprietary models across diverse benchmarks on language understanding, generation, multilingual proficiency, coding, mathematics, and reasoning.\n  The ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2 Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T22:03:38.850913+00:00"}},"summary":{"title":"Qwen2 Technical Report","claims":[{"claim_text":"This report introduces the Qwen2 series, the latest addition to our large language models and large multimodal models. We release a comprehensive suite of foundational and instruction-tuned language models, encompassing a parameter range from 0.5 to 72 billion, featuring dense models and a Mixture-of-Experts model. Qwen2 surpasses most prior open-weight models, including its predecessor Qwen1.5, and exhibits competitive performance relative to proprietary models across diverse benchmarks on language understanding, generation, multilingual proficiency, coding, mathematics, and reasoning.\n  The ","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Qwen2 Technical Report because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":45},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":31},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":25},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":25},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":24},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":24},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":22},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":22},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":20},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":18},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":17},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":15},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":15},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":14},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":13},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":12},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":11},{"title":"Gemma 2: Improving Open Language Models at a Practical Size","work_id":"4dd94e2f-2b27-4cbf-88a0-4910f0772a57","shared_citers":11},{"title":"Program Synthesis with Large Language Models","work_id":"fd241a05-03b9-4de2-9588-9d77ce176125","shared_citers":11},{"title":"Gemma: Open Models Based on Gemini Research and Technology","work_id":"a9ea2870-df28-40b8-a9e0-a7e9a116f793","shared_citers":10},{"title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach","work_id":"41fe12c4-e538-4890-a244-480650ed3078","shared_citers":10},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":9},{"title":"GPQA: A Graduate-Level Google-Proof Q&A Benchmark","work_id":"9e2a976b-f5ad-4aee-af5c-243fe0fe75d2","shared_citers":9},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":9}],"time_series":[{"n":1,"year":2023},{"n":6,"year":2024},{"n":6,"year":2025},{"n":135,"year":2026}]},"authors":[{"id":"f6e1a34c-959d-4c3d-b317-1a65d6fe682c","orcid":null,"display_name":"An Yang","source":"manual","import_confidence":0.72},{"id":"3e0735af-daed-4e22-b48a-b7778e5e9a45","orcid":null,"display_name":"Baosong Yang","source":"manual","import_confidence":0.72},{"id":"785c6603-fbf9-4a35-bfb2-686be03040d6","orcid":null,"display_name":"Binyuan Hui","source":"manual","import_confidence":0.72},{"id":"cbf01de2-0d26-46f1-9d5a-d5752c92dae6","orcid":null,"display_name":"Bowen Yu","source":"manual","import_confidence":0.72},{"id":"cc48b5a8-a38d-4d8c-ace7-5add79c07fd0","orcid":null,"display_name":"Bo Zheng","source":"manual","import_confidence":0.72},{"id":"3e6ef8c8-5caa-4c37-a2d5-2f19a328f627","orcid":null,"display_name":"Chang Zhou","source":"manual","import_confidence":0.72}]}}