{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:HRTAYKDCW2TVOJ7FZHHWNGUPFO","short_pith_number":"pith:HRTAYKDC","schema_version":"1.0","canonical_sha256":"3c660c2862b6a75727e5c9cf669a8f2b9e8b6c73bb9d5fa4177e969b0fe2ecdd","source":{"kind":"arxiv","id":"2606.31326","version":1},"attestation_state":"computed","paper":{"title":"Bridging Video Understanding and Generation in a Unified Framework","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Mingyu Guo, Renjie Chen, Runyi Li, Ruoyu Feng, Wenfeng Lin, Yuqi Wang","submitted_at":"2026-06-30T08:29:29Z","abstract_excerpt":"Recently, unified image generation and understanding have been extensively explored. However, extending such unified modeling paradigms to the video domain remains largely underexplored. A central challenge is that video understanding favors compact, discriminative semantic representations, whereas video generation requires dense signals that preserve visual details and temporal coherence. Videos naturally capture both spatial semantics and temporal dynamics, making them a more suitable modality for unified multimodal modeling compared to static images. In this paper, we propose Vega, a unifie"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.31326","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-30T08:29:29Z","cross_cats_sorted":[],"title_canon_sha256":"811409bbe1906436f87afe15b7a8348d2cbd8d1263ee5a1a1476f42f4d9864eb","abstract_canon_sha256":"36891eb2ca1f582effa95bb2a153ce6543a09ee5467c88982c07d56755deeb8f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T01:17:59.254678Z","signature_b64":"KBi7s8uuz6GHxGbf31uVDHtlnbD0cDA748lmLU9ysA75GhVmrboSZtDKRKK8HzgBCy9PsOJ3O4bC6441HLT0Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3c660c2862b6a75727e5c9cf669a8f2b9e8b6c73bb9d5fa4177e969b0fe2ecdd","last_reissued_at":"2026-07-01T01:17:59.254237Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T01:17:59.254237Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Bridging Video Understanding and Generation in a Unified Framework","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Mingyu Guo, Renjie Chen, Runyi Li, Ruoyu Feng, Wenfeng Lin, Yuqi Wang","submitted_at":"2026-06-30T08:29:29Z","abstract_excerpt":"Recently, unified image generation and understanding have been extensively explored. However, extending such unified modeling paradigms to the video domain remains largely underexplored. A central challenge is that video understanding favors compact, discriminative semantic representations, whereas video generation requires dense signals that preserve visual details and temporal coherence. Videos naturally capture both spatial semantics and temporal dynamics, making them a more suitable modality for unified multimodal modeling compared to static images. In this paper, we propose Vega, a unifie"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.31326","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.31326/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.31326","created_at":"2026-07-01T01:17:59.254300+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.31326v1","created_at":"2026-07-01T01:17:59.254300+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.31326","created_at":"2026-07-01T01:17:59.254300+00:00"},{"alias_kind":"pith_short_12","alias_value":"HRTAYKDCW2TV","created_at":"2026-07-01T01:17:59.254300+00:00"},{"alias_kind":"pith_short_16","alias_value":"HRTAYKDCW2TVOJ7F","created_at":"2026-07-01T01:17:59.254300+00:00"},{"alias_kind":"pith_short_8","alias_value":"HRTAYKDC","created_at":"2026-07-01T01:17:59.254300+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO","json":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO.json","graph_json":"https://pith.science/api/pith-number/HRTAYKDCW2TVOJ7FZHHWNGUPFO/graph.json","events_json":"https://pith.science/api/pith-number/HRTAYKDCW2TVOJ7FZHHWNGUPFO/events.json","paper":"https://pith.science/paper/HRTAYKDC"},"agent_actions":{"view_html":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO","download_json":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO.json","view_paper":"https://pith.science/paper/HRTAYKDC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.31326&json=true","fetch_graph":"https://pith.science/api/pith-number/HRTAYKDCW2TVOJ7FZHHWNGUPFO/graph.json","fetch_events":"https://pith.science/api/pith-number/HRTAYKDCW2TVOJ7FZHHWNGUPFO/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO/action/storage_attestation","attest_author":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO/action/author_attestation","sign_citation":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO/action/citation_signature","submit_replication":"https://pith.science/pith/HRTAYKDCW2TVOJ7FZHHWNGUPFO/action/replication_record"}},"created_at":"2026-07-01T01:17:59.254300+00:00","updated_at":"2026-07-01T01:17:59.254300+00:00"}