{"work_id":"69dffacb-bfe8-442d-be86-48624c60426f","graph":{"co_cited":[{"title":"Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution","work_id":"8abcfe4f-e0fb-44b7-9123-448fac95f90a","shared_citers":112},{"title":"Qwen3-VL Technical Report","work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","shared_citers":100},{"title":"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models","work_id":"c5006563-f3ec-438a-9e35-b7b484f34828","shared_citers":82},{"title":"InternVL3: Exploring Advanced Training and Test-Time Recipes for Open-Source Multimodal Models","work_id":"fe8637aa-12bc-4434-8d36-9f57b5eebcbe","shared_citers":80},{"title":"GPT-4o System Card","work_id":"f37bf1c7-4964-4e56-9762-d20da8d9009f","shared_citers":79},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":78},{"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","shared_citers":72},{"title":"LLaVA-OneVision: Easy Visual Task Transfer","work_id":"f5f2452b-f2a9-49ac-b38d-c76e18cdfe49","shared_citers":70},{"title":"Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities","work_id":"008df105-2fdd-45d8-857a-8e35868aecb6","shared_citers":64},{"title":"InternVL3.5: Advancing Open-Source Multimodal Models in Versatility, Reasoning, and Efficiency","work_id":"b8f5e260-fff5-444e-bcf5-2c42cfefd83d","shared_citers":61},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":61},{"title":"Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling","work_id":"ee70bdc8-4656-4849-ada7-ce42a2278d70","shared_citers":57},{"title":"Qwen-VL: A Versatile Vision-Language Model for Understanding, Localization, Text Reading, and Beyond","work_id":"cbc2bb21-b6bb-46c0-80bf-107e195ffe10","shared_citers":42},{"title":"Gemini: A Family of Highly Capable Multimodal Models","work_id":"83f7c85b-3f11-450f-ac0c-64d9745220b2","shared_citers":38},{"title":"Proximal Policy Optimization Algorithms","work_id":"240c67fe-d14d-4520-91c1-38a4e272ca19","shared_citers":32},{"title":"The Llama 3 Herd of Models","work_id":"1549a635-88af-4ac1-acfe-51ae7bb53345","shared_citers":32},{"title":"Emerging Properties in Unified Multimodal Pretraining","work_id":"e0cfd82c-f5d4-44fd-b531-ec73ab0a805b","shared_citers":30},{"title":"OpenAI o1 System Card","work_id":"68d3c334-0fc9-49e3-b7b0-a69afae933e2","shared_citers":29},{"title":"Gemma 3 Technical Report","work_id":"f93e08bf-9e96-409b-8ac6-b8385fd17fd7","shared_citers":28},{"title":"LLaVA-Video: Video Instruction Tuning With Synthetic Data","work_id":"e598f516-d992-449a-ab6d-6c788b3a1d7b","shared_citers":28},{"title":"DAPO: An Open-Source LLM Reinforcement Learning System at Scale","work_id":"64019d00-0b11-4bbd-b173-b46c8fad0157","shared_citers":27},{"title":"Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context","work_id":"80e3e977-f1bb-4c83-8d0c-1ab0a0c5c3f1","shared_citers":27},{"title":"MiniCPM-V: A GPT-4V Level MLLM on Your Phone","work_id":"0f06e436-0c76-4e3c-be5e-6168f6bc4336","shared_citers":26},{"title":"SigLIP 2: Multilingual Vision-Language Encoders with Improved Semantic Understanding, Localization, and Dense Features","work_id":"50eec732-2d41-432f-9dcf-ac7fff235ea5","shared_citers":26}],"time_series":[{"n":25,"year":2025},{"n":387,"year":2026}]}}