{"work":{"id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","openalex_id":null,"doi":null,"arxiv_id":"2302.13971","raw_key":null,"title":"LLaMA: Open and Efficient Foundation Language Models","authors":null,"authors_text":"H","year":2023,"venue":"cs.CL","abstract":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","external_url":"https://arxiv.org/abs/2302.13971","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-15T01:53:28.870134+00:00","pith_arxiv_id":"2302.13971","created_at":"2026-05-08T18:13:54.078791+00:00","updated_at":"2026-05-15T01:53:28.870134+00:00","title_quality_ok":true,"display_title":"LLaMA: Open and Efficient Foundation Language Models","render_title":"LLaMA: Open and Efficient Foundation Language Models"},"hub":{"state":{"work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":481,"external_cited_by_count":null,"distinct_field_count":32,"first_pith_cited_at":"2023-03-28T17:59:12+00:00","last_pith_cited_at":"2026-05-14T09:00:56+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-15T01:46:18.564187+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[{"context_role":"background","n":15},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":13},{"context_polarity":"unclear","n":2},{"context_polarity":"use_method","n":1}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T18:13:30.893278+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[]},"error":null,"updated_at":"2026-05-13T18:13:30.891244+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-13T18:13:30.887581+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":121},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":56},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":52},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":51},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":50},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":49},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":48},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":43},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":43},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":40},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":37},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":36},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":35},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":29},{"title":"Training Compute-Optimal Large Language Models","work_id":"b2faf28d-86b7-429c-bc42-469458efc246","shared_citers":29},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":27},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":27},{"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","shared_citers":27},{"title":"OPT: Open Pre-trained Transformer Language Models","work_id":"d7ff3b21-1fff-4cf4-952a-4714e3ef2307","shared_citers":26},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":25},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":25},{"title":"A Survey of Large Language Models","work_id":"de1b42b5-4a0a-4b1f-8c78-1f7fe21be6c9","shared_citers":24},{"title":"GLU Variants Improve Transformer","work_id":"17d0763c-1016-41ab-a478-478e890765eb","shared_citers":23},{"title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism","work_id":"c888e6d1-0b1d-43d6-9ef5-f0912a0efa1b","shared_citers":23}],"time_series":[{"n":34,"year":2023},{"n":26,"year":2024},{"n":4,"year":2025},{"n":338,"year":2026}]},"error":null,"updated_at":"2026-05-13T17:25:55.795642+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"fixed":1,"items":[{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-13T18:13:30.286958+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T18:13:30.889955+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-13T17:25:52.718711+00:00"}},"summary":{"title":"LLaMA: Open and Efficient Foundation Language Models","claims":[{"claim_text":"We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks LLaMA: Open and Efficient Foundation Language Models because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":121},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":56},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":52},{"title":"Mistral 7B","work_id":"eb5e1305-ad11-4875-ad8d-ad8b8f697599","shared_citers":51},{"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","shared_citers":50},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":49},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":48},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":43},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":43},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":40},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":37},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":36},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":35},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":29},{"title":"Training Compute-Optimal Large Language Models","work_id":"b2faf28d-86b7-429c-bc42-469458efc246","shared_citers":29},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":27},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":27},{"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","shared_citers":27},{"title":"OPT: Open Pre-trained Transformer Language Models","work_id":"d7ff3b21-1fff-4cf4-952a-4714e3ef2307","shared_citers":26},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":25},{"title":"Qwen2 Technical Report","work_id":"a1857881-ab9b-4b80-9b5f-9ae4b5c2566d","shared_citers":25},{"title":"A Survey of Large Language Models","work_id":"de1b42b5-4a0a-4b1f-8c78-1f7fe21be6c9","shared_citers":24},{"title":"GLU Variants Improve Transformer","work_id":"17d0763c-1016-41ab-a478-478e890765eb","shared_citers":23},{"title":"Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism","work_id":"c888e6d1-0b1d-43d6-9ef5-f0912a0efa1b","shared_citers":23}],"time_series":[{"n":34,"year":2023},{"n":26,"year":2024},{"n":4,"year":2025},{"n":338,"year":2026}]},"authors":[]}}