{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MGLSDXN2TCI63N6EII7RH2UKGO","short_pith_number":"pith:MGLSDXN2","schema_version":"1.0","canonical_sha256":"619721ddba9891edb7c4423f13ea8a33945ce29df2b0b293105ff5ea328d058a","source":{"kind":"arxiv","id":"2606.08415","version":1},"attestation_state":"computed","paper":{"title":"CoVEBench: Can Video Editing Models Handle Complex Instructions?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Dunyuan Liu, Jiaheng Liu, Jialu Chen, Jiaming Wang, Jiangtao Wu, Shihao Li, Xuedong Zhao, Yiwen He, Yuanxing Zhang, Zekun Moore Wang","submitted_at":"2026-06-07T02:29:41Z","abstract_excerpt":"While recent text-guided video editing models excel at elementary tasks (e.g., style transfer, object insertion), real-world user requests are highly compositional. A single prompt often demands multiple coupled edits, such as modifying subjects, actions, and camera views, while strictly preserving unrelated spatiotemporal content. Existing benchmarks, heavily constrained by isolated edits and coarse global metrics, fail to diagnose how models handle such complex workflows. To address this gap, we introduce CoVEBench, a compositional video editing benchmark comprising 416 curated source videos"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.08415","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-07T02:29:41Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a57b94182fe5408b1052a4aec7fe2c38728f811f31a11f3259db7630d6d3c639","abstract_canon_sha256":"e6b40cdd6065cc8902c7fc66cd2086b06cd21bd4c56602c4d1f1c424e59717df"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:36.095280Z","signature_b64":"AvUkBwFaZIivmYuObPUF0zHAK+cdWEUly3Fr6kH2vXNO4ywJqtn4M5IXd2pN3x+WkcsJ0SZRk0IFyqyoYw3FCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"619721ddba9891edb7c4423f13ea8a33945ce29df2b0b293105ff5ea328d058a","last_reissued_at":"2026-06-09T01:05:36.094864Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:36.094864Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CoVEBench: Can Video Editing Models Handle Complex Instructions?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Dunyuan Liu, Jiaheng Liu, Jialu Chen, Jiaming Wang, Jiangtao Wu, Shihao Li, Xuedong Zhao, Yiwen He, Yuanxing Zhang, Zekun Moore Wang","submitted_at":"2026-06-07T02:29:41Z","abstract_excerpt":"While recent text-guided video editing models excel at elementary tasks (e.g., style transfer, object insertion), real-world user requests are highly compositional. A single prompt often demands multiple coupled edits, such as modifying subjects, actions, and camera views, while strictly preserving unrelated spatiotemporal content. Existing benchmarks, heavily constrained by isolated edits and coarse global metrics, fail to diagnose how models handle such complex workflows. To address this gap, we introduce CoVEBench, a compositional video editing benchmark comprising 416 curated source videos"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08415","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08415/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.08415","created_at":"2026-06-09T01:05:36.094925+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.08415v1","created_at":"2026-06-09T01:05:36.094925+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08415","created_at":"2026-06-09T01:05:36.094925+00:00"},{"alias_kind":"pith_short_12","alias_value":"MGLSDXN2TCI6","created_at":"2026-06-09T01:05:36.094925+00:00"},{"alias_kind":"pith_short_16","alias_value":"MGLSDXN2TCI63N6E","created_at":"2026-06-09T01:05:36.094925+00:00"},{"alias_kind":"pith_short_8","alias_value":"MGLSDXN2","created_at":"2026-06-09T01:05:36.094925+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO","json":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO.json","graph_json":"https://pith.science/api/pith-number/MGLSDXN2TCI63N6EII7RH2UKGO/graph.json","events_json":"https://pith.science/api/pith-number/MGLSDXN2TCI63N6EII7RH2UKGO/events.json","paper":"https://pith.science/paper/MGLSDXN2"},"agent_actions":{"view_html":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO","download_json":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO.json","view_paper":"https://pith.science/paper/MGLSDXN2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.08415&json=true","fetch_graph":"https://pith.science/api/pith-number/MGLSDXN2TCI63N6EII7RH2UKGO/graph.json","fetch_events":"https://pith.science/api/pith-number/MGLSDXN2TCI63N6EII7RH2UKGO/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO/action/storage_attestation","attest_author":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO/action/author_attestation","sign_citation":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO/action/citation_signature","submit_replication":"https://pith.science/pith/MGLSDXN2TCI63N6EII7RH2UKGO/action/replication_record"}},"created_at":"2026-06-09T01:05:36.094925+00:00","updated_at":"2026-06-09T01:05:36.094925+00:00"}