{"work_id":"1910796d-9b52-4683-bf5c-de9632c1028b","graph":{"co_cited":[{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":61},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":36},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":31},{"title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","work_id":"e96730e3-129b-4db6-b981-15ab7932e297","shared_citers":30},{"title":"Auto-Encoding Variational Bayes","work_id":"97d95295-30e1-42b4-bbf6-85f0fa4edb44","shared_citers":29},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":25},{"title":"Gaussian Error Linear Units (GELUs)","work_id":"0466fd22-03a1-4a61-af0a-a900e77bb023","shared_citers":24},{"title":"Layer Normalization","work_id":"20a2d720-0046-4c7c-bcd6-327ec8143f69","shared_citers":24},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":22},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":21},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":21},{"title":"DINOv2: Learning Robust Visual Features without Supervision","work_id":"26b304e5-b54a-4f26-be7e-83299eca52e4","shared_citers":20},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":20},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":20},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":19},{"title":"Very Deep Convolutional Networks for Large-Scale Image Recognition","work_id":"1c4b4409-c14b-488b-a086-c57a5aab8a29","shared_citers":18},{"title":"Attention Is All You Need","work_id":"baafb5a2-5272-43bc-932f-09fa9ffe5316","shared_citers":17},{"title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","work_id":"ed240a10-5b19-406c-baa5-30803f465785","shared_citers":17},{"title":"Flow Matching for Generative Modeling","work_id":"6edb71c4-5d64-40af-a394-9757ea051a36","shared_citers":17},{"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","shared_citers":16},{"title":"Score-Based Generative Modeling through Stochastic Differential Equations","work_id":"d9110e53-a5d4-4794-a4c5-a575e91c31ad","shared_citers":16},{"title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","work_id":"68a5177f-d644-44c1-bd4f-4e5278c22f5d","shared_citers":15},{"title":"PennyLane: Automatic differentiation of hybrid quantum-classical computations","work_id":"83078d0b-6c02-4fc5-822d-4da4204fd057","shared_citers":15},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":14}],"time_series":[{"n":2,"year":2015},{"n":3,"year":2016},{"n":3,"year":2017},{"n":4,"year":2018},{"n":5,"year":2019},{"n":3,"year":2020},{"n":7,"year":2021},{"n":7,"year":2022},{"n":6,"year":2023},{"n":5,"year":2024},{"n":7,"year":2025},{"n":585,"year":2026}]}}