{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:ZVRDHIXGLCIOGPHJO7AJREHR73","short_pith_number":"pith:ZVRDHIXG","schema_version":"1.0","canonical_sha256":"cd6233a2e65890e33ce977c09890f1fedc44d7c04e464990d4bf2dd58bec3d1b","source":{"kind":"arxiv","id":"2412.00131","version":1},"attestation_state":"computed","paper":{"title":"Open-Sora Plan: Open-Source Large Video Generation Model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Bin Lin, Bin She, Bin Zhu, Cen Yan, Junwu Zhang, Lin Chen, Liuhan Chen, Li Yuan, Shaodong Wang, Shaoling Dong, Shenghai Yuan, Tanghui Jia, Xianyi He, Xiaoyi Dong, Xing Zhou, Xinhua Cheng, Yang Ye, Yatian Pang, Yonghong Tian, Yunyang Ge, Zhang Pan, Zhenyu Tang, Zhiheng Hu, Zongjian Li","submitted_at":"2024-11-28T14:07:45Z","abstract_excerpt":"We introduce Open-Sora Plan, an open-source project that aims to contribute a large generation model for generating desired high-resolution videos with long durations based on various user inputs. Our project comprises multiple components for the entire video generation process, including a Wavelet-Flow Variational Autoencoder, a Joint Image-Video Skiparse Denoiser, and various condition controllers. Moreover, many assistant strategies for efficient training and inference are designed, and a multi-dimensional data curation pipeline is proposed for obtaining desired high-quality data. Benefitin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2412.00131","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-11-28T14:07:45Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"586f8a7a064f033ed24aea2b9791446fe72495310837b02b86450be85ffaf296","abstract_canon_sha256":"32bf2e16d2e48930e8dba7dd521962ef4269afd2af3ceae8184b9743d1eae87b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:49.203130Z","signature_b64":"z6SXrHisJ5p6YteE6Tk0DGLjOWur54ActzUVcjaOQ0DDcnYpyK0gFuINR8L+SSPjcl5osoFmDLElSqxPBRv+AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cd6233a2e65890e33ce977c09890f1fedc44d7c04e464990d4bf2dd58bec3d1b","last_reissued_at":"2026-05-17T23:38:49.202609Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:49.202609Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Open-Sora Plan: Open-Source Large Video Generation Model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Bin Lin, Bin She, Bin Zhu, Cen Yan, Junwu Zhang, Lin Chen, Liuhan Chen, Li Yuan, Shaodong Wang, Shaoling Dong, Shenghai Yuan, Tanghui Jia, Xianyi He, Xiaoyi Dong, Xing Zhou, Xinhua Cheng, Yang Ye, Yatian Pang, Yonghong Tian, Yunyang Ge, Zhang Pan, Zhenyu Tang, Zhiheng Hu, Zongjian Li","submitted_at":"2024-11-28T14:07:45Z","abstract_excerpt":"We introduce Open-Sora Plan, an open-source project that aims to contribute a large generation model for generating desired high-resolution videos with long durations based on various user inputs. Our project comprises multiple components for the entire video generation process, including a Wavelet-Flow Variational Autoencoder, a Joint Image-Video Skiparse Denoiser, and various condition controllers. Moreover, many assistant strategies for efficient training and inference are designed, and a multi-dimensional data curation pipeline is proposed for obtaining desired high-quality data. Benefitin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2412.00131","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2412.00131","created_at":"2026-05-17T23:38:49.202691+00:00"},{"alias_kind":"arxiv_version","alias_value":"2412.00131v1","created_at":"2026-05-17T23:38:49.202691+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2412.00131","created_at":"2026-05-17T23:38:49.202691+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZVRDHIXGLCIO","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZVRDHIXGLCIOGPHJ","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZVRDHIXG","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":37,"internal_anchor_count":18,"sample":[{"citing_arxiv_id":"2605.23345","citing_title":"SCOPE: Simulating Cross-game Operations in Playable Environments for FPS World Models","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2503.20314","citing_title":"Wan: Open and Advanced Large-Scale Video Generative Models","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2602.02214","citing_title":"Causal Forcing: Autoregressive Diffusion Distillation Done Right for High-Quality Real-Time Interactive Video Generation","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22015","citing_title":"ORBIS: Output-Guided Token Reduction with Distribution-Aware Matching for Video Diffusion Acceleration","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2602.02214","citing_title":"Causal Forcing: Autoregressive Diffusion Distillation Done Right for High-Quality Real-Time Interactive Video Generation","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14382","citing_title":"Delta Forcing: Trust Region Steering for Interactive Autoregressive Video Generation","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19398","citing_title":"Rebalancing Reference Frame Dominance to Improve Motion in Image-to-Video Models","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14382","citing_title":"Delta Forcing: Trust Region Steering for Interactive Autoregressive Video Generation","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16649","citing_title":"AtlasVid: Efficient Ultra-High-Resolution Long Video Generation via Decoupled Global-Local Modeling","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16713","citing_title":"GeoWorld-VLM: Geometry from World Models for Vision-Language Models","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17248","citing_title":"Image-to-Video Diffusion: From Foundations to Open Frontiers","ref_index":65,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19398","citing_title":"Rebalancing Reference Frame Dominance to Improve Motion in Image-to-Video Models","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2506.00433","citing_title":"Latent Wavelet Diffusion For Ultra-High-Resolution Image Synthesis","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2502.10248","citing_title":"Step-Video-T2V Technical Report: The Practice, Challenges, and Future of Video Foundation Model","ref_index":69,"is_internal_anchor":true},{"citing_arxiv_id":"2506.19840","citing_title":"GenHSI: Controllable Generation of Human-Scene Interaction Videos","ref_index":54,"is_internal_anchor":true},{"citing_arxiv_id":"2508.13009","citing_title":"Matrix-game 2.0: An open-source real-time and streaming interactive world model","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2512.04678","citing_title":"Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2502.06764","citing_title":"History-Guided Video Diffusion","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2510.02283","citing_title":"Self-Forcing++: Towards Minute-Scale High-Quality Video Generation","ref_index":34,"is_internal_anchor":false},{"citing_arxiv_id":"2602.13669","citing_title":"EchoTorrent: Towards Swift, Sustained, and Streaming Multi-Modal Video Generation","ref_index":48,"is_internal_anchor":false},{"citing_arxiv_id":"2602.21581","citing_title":"MultiAnimate: Pose-Guided Image Animation Made Extensible","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2603.09721","citing_title":"FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation","ref_index":27,"is_internal_anchor":false},{"citing_arxiv_id":"2603.18636","citing_title":"Attention Sparsity is Input-Stable: Training-Free Sparse Attention for Video Generation via Offline Sparsity Profiling and Online QK Co-Clustering","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2605.14382","citing_title":"Delta Forcing: Trust Region Steering for Interactive Autoregressive Video Generation","ref_index":7,"is_internal_anchor":false},{"citing_arxiv_id":"2605.12088","citing_title":"UniCustom: Unified Visual Conditioning for Multi-Reference Image Generation","ref_index":19,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73","json":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73.json","graph_json":"https://pith.science/api/pith-number/ZVRDHIXGLCIOGPHJO7AJREHR73/graph.json","events_json":"https://pith.science/api/pith-number/ZVRDHIXGLCIOGPHJO7AJREHR73/events.json","paper":"https://pith.science/paper/ZVRDHIXG"},"agent_actions":{"view_html":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73","download_json":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73.json","view_paper":"https://pith.science/paper/ZVRDHIXG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2412.00131&json=true","fetch_graph":"https://pith.science/api/pith-number/ZVRDHIXGLCIOGPHJO7AJREHR73/graph.json","fetch_events":"https://pith.science/api/pith-number/ZVRDHIXGLCIOGPHJO7AJREHR73/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73/action/storage_attestation","attest_author":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73/action/author_attestation","sign_citation":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73/action/citation_signature","submit_replication":"https://pith.science/pith/ZVRDHIXGLCIOGPHJO7AJREHR73/action/replication_record"}},"created_at":"2026-05-17T23:38:49.202691+00:00","updated_at":"2026-05-17T23:38:49.202691+00:00"}