{"work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","graph":{"co_cited":[{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":255},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":241},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":224},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":174},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":107},{"title":"OpenAI o1 System Card","work_id":"68d3c334-0fc9-49e3-b7b0-a69afae933e2","shared_citers":91},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":87},{"title":"Qwen2.5-VL Technical Report","work_id":"69dffacb-bfe8-442d-be86-48624c60426f","shared_citers":82},{"title":"Group Sequence Policy Optimization","work_id":"3a98b53b-9f52-4d95-adf7-89353c0a9a65","shared_citers":76},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":74},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":72},{"title":"Qwen2.5 Technical Report","work_id":"d8432992-4980-4a81-85c7-9fa2c2b87f85","shared_citers":72},{"title":"Qwen3-VL Technical Report","work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","shared_citers":72},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":66},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":61},{"title":"Measuring Mathematical Problem Solving With the MATH Dataset","work_id":"50652ac6-fb7c-4675-a2c2-159c241feb17","shared_citers":61},{"title":"Understanding R1-Zero-Like Training: A Critical Perspective","work_id":"ec354f3b-9484-4a0c-94c8-92d4d0260835","shared_citers":61},{"title":"DeepSeek-V3 Technical Report","work_id":"57d2791d-2219-4c31-a077-afc04b12a75c","shared_citers":53},{"title":"HybridFlow: A Flexible and Efficient RLHF Framework","work_id":"7eb9c9f4-b322-4bba-8011-09ff8d6ad801","shared_citers":49},{"title":"Kimi k1.5: Scaling Reinforcement Learning with LLMs","work_id":"bff96ab1-bd6a-4585-be23-74fdb51969c7","shared_citers":46},{"title":"Does Reinforcement Learning Really Incentivize Reasoning Capacity in LLMs Beyond the Base Model?","work_id":"d854765a-e664-41c0-8655-21c4bf2e0cc4","shared_citers":41},{"title":"Search-R1: Training LLMs to Reason and Leverage Search Engines with Reinforcement Learning","work_id":"0e0b7549-2bc4-4574-aa7f-588ffa16eaae","shared_citers":39},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":37},{"title":"Tulu 3: Pushing Frontiers in Open Language Model Post-Training","work_id":"28c9dbea-056a-48c2-8000-85f809827e45","shared_citers":37}],"time_series":[{"n":5,"year":2024},{"n":27,"year":2025},{"n":595,"year":2026}]}}