{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:RRNQ26LZWORYBNUWD2RPDS7Y3D","short_pith_number":"pith:RRNQ26LZ","schema_version":"1.0","canonical_sha256":"8c5b0d7979b3a380b6961ea2f1cbf8d8f01c91ca03a08b58fce7e3674b58ac62","source":{"kind":"arxiv","id":"1706.08033","version":2},"attestation_state":"computed","paper":{"title":"Decomposing Motion and Content for Natural Video Sequence Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Honglak Lee, Jimei Yang, Ruben Villegas, Seunghoon Hong, Xunyu Lin","submitted_at":"2017-06-25T04:18:12Z","abstract_excerpt":"We propose a deep neural network for the prediction of future frames in natural video sequences. To effectively handle complex evolution of pixels in videos, we propose to decompose the motion and content, two key components generating dynamics in videos. Our model is built upon the Encoder-Decoder Convolutional Neural Network and Convolutional LSTM for pixel-level prediction, which independently capture the spatial layout of an image and the corresponding temporal dynamics. By independently modeling motion and content, predicting the next frame reduces to converting the extracted content feat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1706.08033","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-06-25T04:18:12Z","cross_cats_sorted":[],"title_canon_sha256":"d59a4c854e40025c48c45c46160b38bb00d4c71c6563eccdaf6f6706080c9c60","abstract_canon_sha256":"2ce023bd6860cce25985b6e51389aef4ea34ea7f0d18833f1143f4320dadab42"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:26:36.884582Z","signature_b64":"6VXXrE8HYw7h/RkMp6PhiIQmgjL6ekxY+uye9zRrC7AJsN+g0brM4uIEF1ZobQtJ4ymZZti2WsAJoQGI/mHjDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8c5b0d7979b3a380b6961ea2f1cbf8d8f01c91ca03a08b58fce7e3674b58ac62","last_reissued_at":"2026-05-18T00:26:36.883964Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:26:36.883964Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Decomposing Motion and Content for Natural Video Sequence Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Honglak Lee, Jimei Yang, Ruben Villegas, Seunghoon Hong, Xunyu Lin","submitted_at":"2017-06-25T04:18:12Z","abstract_excerpt":"We propose a deep neural network for the prediction of future frames in natural video sequences. To effectively handle complex evolution of pixels in videos, we propose to decompose the motion and content, two key components generating dynamics in videos. Our model is built upon the Encoder-Decoder Convolutional Neural Network and Convolutional LSTM for pixel-level prediction, which independently capture the spatial layout of an image and the corresponding temporal dynamics. By independently modeling motion and content, predicting the next frame reduces to converting the extracted content feat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1706.08033","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1706.08033","created_at":"2026-05-18T00:26:36.884068+00:00"},{"alias_kind":"arxiv_version","alias_value":"1706.08033v2","created_at":"2026-05-18T00:26:36.884068+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1706.08033","created_at":"2026-05-18T00:26:36.884068+00:00"},{"alias_kind":"pith_short_12","alias_value":"RRNQ26LZWORY","created_at":"2026-05-18T12:31:39.905425+00:00"},{"alias_kind":"pith_short_16","alias_value":"RRNQ26LZWORYBNUW","created_at":"2026-05-18T12:31:39.905425+00:00"},{"alias_kind":"pith_short_8","alias_value":"RRNQ26LZ","created_at":"2026-05-18T12:31:39.905425+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"1906.10182","citing_title":"Planning Robot Motion using Deep Visual Prediction","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"1907.08845","citing_title":"Order Matters: Shuffling Sequence Generation for Video Prediction","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2304.11193","citing_title":"Multi-Modal World Model for Physical Robot Interactions: Simultaneous Visual and Tactile Predictions for Enhanced Accuracy","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2410.05882","citing_title":"Frame forecasting in cine MRI using the PCA respiratory motion model: comparing recurrent neural networks trained online and transformers","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2512.23421","citing_title":"DriveLaW:Unifying Planning and Video Generation in a Latent Driving World","ref_index":64,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12090","citing_title":"World Action Models: The Next Frontier in Embodied AI","ref_index":284,"is_internal_anchor":false},{"citing_arxiv_id":"2604.06339","citing_title":"Evolution of Video Generative Foundations","ref_index":20,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D","json":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D.json","graph_json":"https://pith.science/api/pith-number/RRNQ26LZWORYBNUWD2RPDS7Y3D/graph.json","events_json":"https://pith.science/api/pith-number/RRNQ26LZWORYBNUWD2RPDS7Y3D/events.json","paper":"https://pith.science/paper/RRNQ26LZ"},"agent_actions":{"view_html":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D","download_json":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D.json","view_paper":"https://pith.science/paper/RRNQ26LZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1706.08033&json=true","fetch_graph":"https://pith.science/api/pith-number/RRNQ26LZWORYBNUWD2RPDS7Y3D/graph.json","fetch_events":"https://pith.science/api/pith-number/RRNQ26LZWORYBNUWD2RPDS7Y3D/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D/action/storage_attestation","attest_author":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D/action/author_attestation","sign_citation":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D/action/citation_signature","submit_replication":"https://pith.science/pith/RRNQ26LZWORYBNUWD2RPDS7Y3D/action/replication_record"}},"created_at":"2026-05-18T00:26:36.884068+00:00","updated_at":"2026-05-18T00:26:36.884068+00:00"}