{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:67J25CW3NUDUNKUHZOSGZGYTCZ","short_pith_number":"pith:67J25CW3","schema_version":"1.0","canonical_sha256":"f7d3ae8adb6d0746aa87cba46c9b1316737aedc0c6d04ebb9fa9f01772efa43c","source":{"kind":"arxiv","id":"2403.07711","version":5},"attestation_state":"computed","paper":{"title":"SSM Meets Video Diffusion Models: Efficient Long-Term Video Generation with Structured State Spaces","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Masahiro Suzuki, Shohei Taniguchi, Yutaka Matsuo, Yuta Oshima","submitted_at":"2024-03-12T14:53:56Z","abstract_excerpt":"Given the remarkable achievements in image generation through diffusion models, the research community has shown increasing interest in extending these models to video generation. Recent diffusion models for video generation have predominantly utilized attention layers to extract temporal features. However, attention layers are limited by their computational costs, which increase quadratically with the sequence length. This limitation presents significant challenges when generating longer video sequences using diffusion models. To overcome this challenge, we propose leveraging state-space mode"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2403.07711","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-03-12T14:53:56Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d828eba2408ad3166cf6b8d423d965da04d60a99fdbc653434ade38f83b0e5ca","abstract_canon_sha256":"60a48803635a222a54714df5f111cfbd5957c8f9488472ce1d3a80616974c42c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T01:17:19.240048Z","signature_b64":"a1/ogurgvGP3HsMMih5PVFa5vL49UPeUnnB0tBnfQv+IetvGLdi36UCP+rSNdmprQPqVFmk1fB2eb3Kl0BifCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f7d3ae8adb6d0746aa87cba46c9b1316737aedc0c6d04ebb9fa9f01772efa43c","last_reissued_at":"2026-06-30T01:17:19.239088Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T01:17:19.239088Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SSM Meets Video Diffusion Models: Efficient Long-Term Video Generation with Structured State Spaces","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Masahiro Suzuki, Shohei Taniguchi, Yutaka Matsuo, Yuta Oshima","submitted_at":"2024-03-12T14:53:56Z","abstract_excerpt":"Given the remarkable achievements in image generation through diffusion models, the research community has shown increasing interest in extending these models to video generation. Recent diffusion models for video generation have predominantly utilized attention layers to extract temporal features. However, attention layers are limited by their computational costs, which increase quadratically with the sequence length. This limitation presents significant challenges when generating longer video sequences using diffusion models. To overcome this challenge, we propose leveraging state-space mode"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2403.07711","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2403.07711/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2403.07711","created_at":"2026-06-30T01:17:19.239244+00:00"},{"alias_kind":"arxiv_version","alias_value":"2403.07711v5","created_at":"2026-06-30T01:17:19.239244+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2403.07711","created_at":"2026-06-30T01:17:19.239244+00:00"},{"alias_kind":"pith_short_12","alias_value":"67J25CW3NUDU","created_at":"2026-06-30T01:17:19.239244+00:00"},{"alias_kind":"pith_short_16","alias_value":"67J25CW3NUDUNKUH","created_at":"2026-06-30T01:17:19.239244+00:00"},{"alias_kind":"pith_short_8","alias_value":"67J25CW3","created_at":"2026-06-30T01:17:19.239244+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2606.27677","citing_title":"DIM-WAM: World-Action Modeling with Diverse Historical Event Memory","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2408.01129","citing_title":"A Survey of Mamba","ref_index":141,"is_internal_anchor":true},{"citing_arxiv_id":"2504.02792","citing_title":"Unified World Models: Coupling Video and Action Diffusion for Pretraining on Large Robotic Datasets","ref_index":33,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ","json":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ.json","graph_json":"https://pith.science/api/pith-number/67J25CW3NUDUNKUHZOSGZGYTCZ/graph.json","events_json":"https://pith.science/api/pith-number/67J25CW3NUDUNKUHZOSGZGYTCZ/events.json","paper":"https://pith.science/paper/67J25CW3"},"agent_actions":{"view_html":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ","download_json":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ.json","view_paper":"https://pith.science/paper/67J25CW3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2403.07711&json=true","fetch_graph":"https://pith.science/api/pith-number/67J25CW3NUDUNKUHZOSGZGYTCZ/graph.json","fetch_events":"https://pith.science/api/pith-number/67J25CW3NUDUNKUHZOSGZGYTCZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ/action/storage_attestation","attest_author":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ/action/author_attestation","sign_citation":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ/action/citation_signature","submit_replication":"https://pith.science/pith/67J25CW3NUDUNKUHZOSGZGYTCZ/action/replication_record"}},"created_at":"2026-06-30T01:17:19.239244+00:00","updated_at":"2026-06-30T01:17:19.239244+00:00"}