{"work":{"id":"a9ea2870-df28-40b8-a9e0-a7e9a116f793","openalex_id":null,"doi":null,"arxiv_id":"2403.08295","raw_key":null,"title":"Gemma: Open Models Based on Gemini Research and Technology","authors":null,"authors_text":"Gemma Team: Thomas Mesnard, Cassidy Hardin, Robert Dadashi, Surya Bhupatiraju, Shreya Pathak, Laurent Sifre","year":2024,"venue":"cs.CL","abstract":"This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations.","external_url":"https://arxiv.org/abs/2403.08295","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-14T22:03:02.355136+00:00","pith_arxiv_id":"2403.08295","created_at":"2026-05-09T06:00:38.052679+00:00","updated_at":"2026-05-14T22:03:02.355136+00:00","title_quality_ok":true,"display_title":"Gemma: Open Models Based on Gemini Research and Technology","render_title":"Gemma: Open Models Based on Gemini Research and Technology"},"hub":{"state":{"work_id":"a9ea2870-df28-40b8-a9e0-a7e9a116f793","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":105,"external_cited_by_count":null,"distinct_field_count":14,"first_pith_cited_at":"2023-03-31T17:28:46+00:00","last_pith_cited_at":"2026-05-13T16:57:51+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-14T21:56:14.481030+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[{"context_role":"background","n":3},{"context_role":"dataset","n":1},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":3},{"context_polarity":"use_dataset","n":1},{"context_polarity":"use_method","n":1}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"Gemma: Open Models Based on Gemini Research and Technology","claims":[{"claim_text":"This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed descript","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Gemma: Open Models Based on Gemini Research and Technology because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T19:16:23.388210+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[{"id":"8144f054-03da-41cd-a70b-665987cbc97c","orcid":null,"display_name":"Gemma Team: Thomas Mesnard"},{"id":"7ac1c3f7-453d-4f58-96bc-86ed4727d11d","orcid":null,"display_name":"Cassidy Hardin"},{"id":"c0802647-4daa-4586-80f8-9048ff956388","orcid":null,"display_name":"Robert Dadashi"},{"id":"9bb70fa2-ecf3-4959-b786-94ce7ef2b6ca","orcid":null,"display_name":"Surya Bhupatiraju"},{"id":"b3928e5b-c66a-4e05-bbaf-ec0fd8a77930","orcid":null,"display_name":"Shreya Pathak"},{"id":"1a111a46-3678-4eb6-aa88-168ecef12078","orcid":null,"display_name":"Laurent Sifre"}]},"error":null,"updated_at":"2026-05-14T19:16:29.866695+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T06:06:48.442679+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":35},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":28},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":27},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":26},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":20},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":18},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":18},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":17},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":12},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":12},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":11},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":10},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":10},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":9},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":8},{"title":"Gemma 2: Improving Open Language Models at a Practical Size","work_id":"4dd94e2f-2b27-4cbf-88a0-4910f0772a57","shared_citers":8},{"title":"LoRA: Low-Rank Adaptation of Large Language Models","work_id":"0426219a-789e-4964-adc8-a04538510818","shared_citers":8},{"title":"Mixtral of Experts","work_id":"0de8c352-9daa-4e1e-8c7b-3d0dec69f369","shared_citers":8},{"title":"Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone","work_id":"feef9556-a016-493c-abd2-0c97a23a7ebf","shared_citers":8},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":7},{"title":"HarmBench: A Standardized Evaluation Framework for Automated Red Teaming and Robust Refusal","work_id":"b0b0303f-2444-4789-a979-8153624312ff","shared_citers":7},{"title":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback","work_id":"a1f2574b-a899-4713-be60-c87ba332656c","shared_citers":7},{"title":"Universal and Transferable Adversarial Attacks on Aligned Language Models","work_id":"3322fa86-1768-4677-8425-dd326b45e078","shared_citers":7},{"title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","work_id":"ed240a10-5b19-406c-baa5-30803f465785","shared_citers":6}],"time_series":[{"n":2,"year":2023},{"n":10,"year":2024},{"n":3,"year":2025},{"n":83,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T06:17:17.093533+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T06:06:52.342776+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"Gemma: Open Models Based on Gemini Research and Technology","claims":[{"claim_text":"This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed descript","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Gemma: Open Models Based on Gemini Research and Technology because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T19:16:29.870699+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Gemma: Open Models Based on Gemini Research and Technology","claims":[{"claim_text":"This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed descript","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Gemma: Open Models Based on Gemini Research and Technology because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T06:17:08.571007+00:00"}},"summary":{"title":"Gemma: Open Models Based on Gemini Research and Technology","claims":[{"claim_text":"This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed descript","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Gemma: Open Models Based on Gemini Research and Technology because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":35},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":28},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":27},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":26},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":20},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":18},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":18},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":17},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":12},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":12},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":11},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":10},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":10},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":9},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":8},{"title":"Gemma 2: Improving Open Language Models at a Practical Size","work_id":"4dd94e2f-2b27-4cbf-88a0-4910f0772a57","shared_citers":8},{"title":"LoRA: Low-Rank Adaptation of Large Language Models","work_id":"0426219a-789e-4964-adc8-a04538510818","shared_citers":8},{"title":"Mixtral of Experts","work_id":"0de8c352-9daa-4e1e-8c7b-3d0dec69f369","shared_citers":8},{"title":"Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone","work_id":"feef9556-a016-493c-abd2-0c97a23a7ebf","shared_citers":8},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":7},{"title":"HarmBench: A Standardized Evaluation Framework for Automated Red Teaming and Robust Refusal","work_id":"b0b0303f-2444-4789-a979-8153624312ff","shared_citers":7},{"title":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback","work_id":"a1f2574b-a899-4713-be60-c87ba332656c","shared_citers":7},{"title":"Universal and Transferable Adversarial Attacks on Aligned Language Models","work_id":"3322fa86-1768-4677-8425-dd326b45e078","shared_citers":7},{"title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","work_id":"ed240a10-5b19-406c-baa5-30803f465785","shared_citers":6}],"time_series":[{"n":2,"year":2023},{"n":10,"year":2024},{"n":3,"year":2025},{"n":83,"year":2026}],"dependency_candidates":[]},"authors":[{"id":"7ac1c3f7-453d-4f58-96bc-86ed4727d11d","orcid":null,"display_name":"Cassidy Hardin","source":"manual","import_confidence":0.72},{"id":"8144f054-03da-41cd-a70b-665987cbc97c","orcid":null,"display_name":"Gemma Team: Thomas Mesnard","source":"manual","import_confidence":0.72},{"id":"1a111a46-3678-4eb6-aa88-168ecef12078","orcid":null,"display_name":"Laurent Sifre","source":"manual","import_confidence":0.72},{"id":"c0802647-4daa-4586-80f8-9048ff956388","orcid":null,"display_name":"Robert Dadashi","source":"manual","import_confidence":0.72},{"id":"b3928e5b-c66a-4e05-bbaf-ec0fd8a77930","orcid":null,"display_name":"Shreya Pathak","source":"manual","import_confidence":0.72},{"id":"9bb70fa2-ecf3-4959-b786-94ce7ef2b6ca","orcid":null,"display_name":"Surya Bhupatiraju","source":"manual","import_confidence":0.72}]}}