{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:KOT6CL3ANMEURBU4IDC2OKRGEH","short_pith_number":"pith:KOT6CL3A","schema_version":"1.0","canonical_sha256":"53a7e12f606b0948869c40c5a72a2621c1ed741ee3d789ad0e10d5802d9cae91","source":{"kind":"arxiv","id":"2602.00443","version":2},"attestation_state":"computed","paper":{"title":"RVCBench: Benchmarking the Robustness of Voice Cloning Across Modern Audio Generation Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.MM","eess.AS"],"primary_cat":"cs.SD","authors_text":"Deval Pandya, Hanlin Yu, Ruinan Jin, Xiaoxiao Li, Xinting Liao","submitted_at":"2026-01-31T01:38:55Z","abstract_excerpt":"Modern voice cloning, also known as zero-shot text-to-speech (TTS), can synthesize speech that closely matches a target speaker from only seconds of reference audio, enabling applications such as personalized speech interfaces and dubbing. In practice, these systems often face noisy reference audio, imperfect text prompts, multilingual and long-form generation, post-processing, and adversarial perturbations, all of which can weaken robustness. Despite rapid progress in codec-token language models and diffusion-based TTS, robustness under realistic deployment shifts remains underexplored. This "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.00443","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-01-31T01:38:55Z","cross_cats_sorted":["cs.MM","eess.AS"],"title_canon_sha256":"815ede98a0e19829d189cc7cd3d26a5ca8a5bee85627099d72bfe124146cd118","abstract_canon_sha256":"5760227cc5b784a5dca211356dda4aed3848aea374af6823f5f9501432440a9e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:04:04.047861Z","signature_b64":"QK7GR8q5uOFbPeh/w6Qxexo1O9WCH90J7cNpfAMh9ppJnBMbgkuIH7Unu8+SZxfqcHRcoB3fEVOv3/Pbzb+BDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"53a7e12f606b0948869c40c5a72a2621c1ed741ee3d789ad0e10d5802d9cae91","last_reissued_at":"2026-05-26T02:04:04.046981Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:04:04.046981Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RVCBench: Benchmarking the Robustness of Voice Cloning Across Modern Audio Generation Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.MM","eess.AS"],"primary_cat":"cs.SD","authors_text":"Deval Pandya, Hanlin Yu, Ruinan Jin, Xiaoxiao Li, Xinting Liao","submitted_at":"2026-01-31T01:38:55Z","abstract_excerpt":"Modern voice cloning, also known as zero-shot text-to-speech (TTS), can synthesize speech that closely matches a target speaker from only seconds of reference audio, enabling applications such as personalized speech interfaces and dubbing. In practice, these systems often face noisy reference audio, imperfect text prompts, multilingual and long-form generation, post-processing, and adversarial perturbations, all of which can weaken robustness. Despite rapid progress in codec-token language models and diffusion-based TTS, robustness under realistic deployment shifts remains underexplored. This "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.00443","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.00443/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.00443","created_at":"2026-05-26T02:04:04.047097+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.00443v2","created_at":"2026-05-26T02:04:04.047097+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.00443","created_at":"2026-05-26T02:04:04.047097+00:00"},{"alias_kind":"pith_short_12","alias_value":"KOT6CL3ANMEU","created_at":"2026-05-26T02:04:04.047097+00:00"},{"alias_kind":"pith_short_16","alias_value":"KOT6CL3ANMEURBU4","created_at":"2026-05-26T02:04:04.047097+00:00"},{"alias_kind":"pith_short_8","alias_value":"KOT6CL3A","created_at":"2026-05-26T02:04:04.047097+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH","json":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH.json","graph_json":"https://pith.science/api/pith-number/KOT6CL3ANMEURBU4IDC2OKRGEH/graph.json","events_json":"https://pith.science/api/pith-number/KOT6CL3ANMEURBU4IDC2OKRGEH/events.json","paper":"https://pith.science/paper/KOT6CL3A"},"agent_actions":{"view_html":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH","download_json":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH.json","view_paper":"https://pith.science/paper/KOT6CL3A","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.00443&json=true","fetch_graph":"https://pith.science/api/pith-number/KOT6CL3ANMEURBU4IDC2OKRGEH/graph.json","fetch_events":"https://pith.science/api/pith-number/KOT6CL3ANMEURBU4IDC2OKRGEH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH/action/storage_attestation","attest_author":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH/action/author_attestation","sign_citation":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH/action/citation_signature","submit_replication":"https://pith.science/pith/KOT6CL3ANMEURBU4IDC2OKRGEH/action/replication_record"}},"created_at":"2026-05-26T02:04:04.047097+00:00","updated_at":"2026-05-26T02:04:04.047097+00:00"}