{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:UXRPN65B4IQCHHCASAGX67QO3O","short_pith_number":"pith:UXRPN65B","schema_version":"1.0","canonical_sha256":"a5e2f6fba1e220239c40900d7f7e0edba4a300b8c7afbefbd717ac72fe516224","source":{"kind":"arxiv","id":"2606.06740","version":1},"attestation_state":"computed","paper":{"title":"Multilingual Multi-Speaker Unit Vocoders: A Systematic Analysis of Discrete Speech Representations","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.SD","authors_text":"Adarsh Arigala, Arjun Gangwar, Naman Kothari, S Umesh","submitted_at":"2026-06-04T21:54:56Z","abstract_excerpt":"Discrete speech units obtained via k-means clustering of self supervised embeddings entangle phonetic, speaker, and language information, causing speaker mixing and cross-lingual interference in multilingual multi-speaker speech generation. Despite growing use in Audio LLMs and speech to speech systems, unit vocoders remain underexplored. We analyze a BigVGAN based unit vocoder, across four Indian languages. We study the interaction between cluster size and conditioning strategies using WER, speaker similarity, and unit level metrics. Results show that cluster size governs intelligibility by i"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.06740","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-06-04T21:54:56Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"4a66477b44ed830c782a339ea8efecba060d120ae96cb0426b8105a58f1500ba","abstract_canon_sha256":"7dc7f973eac9e63b8d810e94a6555c892f92eccb9f0a75f4e3ac232155053ae7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:04:25.519099Z","signature_b64":"hgqLGr0bvTBPX+T6u/WamffPptYP/gPIuRGKKBj5WFc8uWQcJwo9g4+IlvN6v7RvUXRLeeu8jM6De5nHDDseDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a5e2f6fba1e220239c40900d7f7e0edba4a300b8c7afbefbd717ac72fe516224","last_reissued_at":"2026-06-08T01:04:25.518205Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:04:25.518205Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Multilingual Multi-Speaker Unit Vocoders: A Systematic Analysis of Discrete Speech Representations","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.SD","authors_text":"Adarsh Arigala, Arjun Gangwar, Naman Kothari, S Umesh","submitted_at":"2026-06-04T21:54:56Z","abstract_excerpt":"Discrete speech units obtained via k-means clustering of self supervised embeddings entangle phonetic, speaker, and language information, causing speaker mixing and cross-lingual interference in multilingual multi-speaker speech generation. Despite growing use in Audio LLMs and speech to speech systems, unit vocoders remain underexplored. We analyze a BigVGAN based unit vocoder, across four Indian languages. We study the interaction between cluster size and conditioning strategies using WER, speaker similarity, and unit level metrics. Results show that cluster size governs intelligibility by i"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06740","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.06740/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.06740","created_at":"2026-06-08T01:04:25.518382+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.06740v1","created_at":"2026-06-08T01:04:25.518382+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06740","created_at":"2026-06-08T01:04:25.518382+00:00"},{"alias_kind":"pith_short_12","alias_value":"UXRPN65B4IQC","created_at":"2026-06-08T01:04:25.518382+00:00"},{"alias_kind":"pith_short_16","alias_value":"UXRPN65B4IQCHHCA","created_at":"2026-06-08T01:04:25.518382+00:00"},{"alias_kind":"pith_short_8","alias_value":"UXRPN65B","created_at":"2026-06-08T01:04:25.518382+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O","json":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O.json","graph_json":"https://pith.science/api/pith-number/UXRPN65B4IQCHHCASAGX67QO3O/graph.json","events_json":"https://pith.science/api/pith-number/UXRPN65B4IQCHHCASAGX67QO3O/events.json","paper":"https://pith.science/paper/UXRPN65B"},"agent_actions":{"view_html":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O","download_json":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O.json","view_paper":"https://pith.science/paper/UXRPN65B","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.06740&json=true","fetch_graph":"https://pith.science/api/pith-number/UXRPN65B4IQCHHCASAGX67QO3O/graph.json","fetch_events":"https://pith.science/api/pith-number/UXRPN65B4IQCHHCASAGX67QO3O/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O/action/storage_attestation","attest_author":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O/action/author_attestation","sign_citation":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O/action/citation_signature","submit_replication":"https://pith.science/pith/UXRPN65B4IQCHHCASAGX67QO3O/action/replication_record"}},"created_at":"2026-06-08T01:04:25.518382+00:00","updated_at":"2026-06-08T01:04:25.518382+00:00"}