{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2DAPG4A5SDR2AUVQ2LOON3PCMF","short_pith_number":"pith:2DAPG4A5","schema_version":"1.0","canonical_sha256":"d0c0f3701d90e3a052b0d2dce6ede2616ee4c72fbc29738dbe335df39cbbe085","source":{"kind":"arxiv","id":"2605.17312","version":1},"attestation_state":"computed","paper":{"title":"VISTA: Triplet-Supervised Video Style Transfer with Diffusion Transformers","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Haofan Wang, Mike Zheng Shou, Wangzi Yao, Yiren Song","submitted_at":"2026-05-17T08:03:53Z","abstract_excerpt":"Video style transfer aims to render videos in a target artistic style while preserving content, structure, and motion. While image stylization has advanced rapidly, video stylization remains challenging due to temporal inconsistency. Most existing methods stylize frames or keyframes and enforce consistency via heuristic temporal propagation, which is brittle under occlusions, disocclusions, and long-term motion, leading to drift and flickering artifacts. We argue that a fundamental bottleneck lies in the lack of large-scale triplet data and a principled training paradigm that jointly models an"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.17312","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-17T08:03:53Z","cross_cats_sorted":[],"title_canon_sha256":"081e8ba4c66271238d93fc2ede9ffd03f8f9bc23ca41d0cc5593b01533449423","abstract_canon_sha256":"d94d908c8ba3addf087dea885e938f430cf2828aca384760c56ee4a602fc4c89"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:51.602248Z","signature_b64":"SgqQl5JeulJ4APe5/2YzPXVFv6KnGQHn/SCXOWDtYtyuwzbsalq7z+NTxqU4CoTeiMaKKohhv+WHjtkyfO2oDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d0c0f3701d90e3a052b0d2dce6ede2616ee4c72fbc29738dbe335df39cbbe085","last_reissued_at":"2026-05-20T00:03:51.601379Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:51.601379Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"VISTA: Triplet-Supervised Video Style Transfer with Diffusion Transformers","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Haofan Wang, Mike Zheng Shou, Wangzi Yao, Yiren Song","submitted_at":"2026-05-17T08:03:53Z","abstract_excerpt":"Video style transfer aims to render videos in a target artistic style while preserving content, structure, and motion. While image stylization has advanced rapidly, video stylization remains challenging due to temporal inconsistency. Most existing methods stylize frames or keyframes and enforce consistency via heuristic temporal propagation, which is brittle under occlusions, disocclusions, and long-term motion, leading to drift and flickering artifacts. We argue that a fundamental bottleneck lies in the lack of large-scale triplet data and a principled training paradigm that jointly models an"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17312","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.17312/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T22:01:57.787281Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.753835Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"11b97c4132be5e30651a1846b9b909bdfb6dd17537b5654b4dfa8f593ff9132f"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.17312","created_at":"2026-05-20T00:03:51.601528+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.17312v1","created_at":"2026-05-20T00:03:51.601528+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17312","created_at":"2026-05-20T00:03:51.601528+00:00"},{"alias_kind":"pith_short_12","alias_value":"2DAPG4A5SDR2","created_at":"2026-05-20T00:03:51.601528+00:00"},{"alias_kind":"pith_short_16","alias_value":"2DAPG4A5SDR2AUVQ","created_at":"2026-05-20T00:03:51.601528+00:00"},{"alias_kind":"pith_short_8","alias_value":"2DAPG4A5","created_at":"2026-05-20T00:03:51.601528+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF","json":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF.json","graph_json":"https://pith.science/api/pith-number/2DAPG4A5SDR2AUVQ2LOON3PCMF/graph.json","events_json":"https://pith.science/api/pith-number/2DAPG4A5SDR2AUVQ2LOON3PCMF/events.json","paper":"https://pith.science/paper/2DAPG4A5"},"agent_actions":{"view_html":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF","download_json":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF.json","view_paper":"https://pith.science/paper/2DAPG4A5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.17312&json=true","fetch_graph":"https://pith.science/api/pith-number/2DAPG4A5SDR2AUVQ2LOON3PCMF/graph.json","fetch_events":"https://pith.science/api/pith-number/2DAPG4A5SDR2AUVQ2LOON3PCMF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF/action/storage_attestation","attest_author":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF/action/author_attestation","sign_citation":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF/action/citation_signature","submit_replication":"https://pith.science/pith/2DAPG4A5SDR2AUVQ2LOON3PCMF/action/replication_record"}},"created_at":"2026-05-20T00:03:51.601528+00:00","updated_at":"2026-05-20T00:03:51.601528+00:00"}