{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:ZZJCXPKS6SRF5QOFNHOAVAYUL4","short_pith_number":"pith:ZZJCXPKS","schema_version":"1.0","canonical_sha256":"ce522bbd52f4a25ec1c569dc0a83145f02e85bc2a0252fe4fd5ea87b3f0b6364","source":{"kind":"arxiv","id":"2512.20211","version":2},"attestation_state":"computed","paper":{"title":"Aliasing-Free Neural Audio Synthesis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Differentiable anti-aliasing modules in neural vocoders and codecs remove artifacts to boost music and singing synthesis.","cross_cats":["eess.AS","eess.SP"],"primary_cat":"cs.SD","authors_text":"Chaoren Wang, Jerry Li, Junan Zhang, Lauri Juvela, Yicheng Gu, Zhizheng Wu","submitted_at":"2025-12-23T10:04:48Z","abstract_excerpt":"In neural audio synthesis, neural vocoders and codecs are models that reconstruct waveforms from acoustic and latent representations, which are essential to the resulting audio quality. While current models are capable of generating perceptually natural speech, they still struggle with high-fidelity music and singing voice synthesis, as severe aliasing artifacts are introduced by non-linear activation functions and upsampling layers in existing architectures. Although various anti-aliasing techniques have been proposed in digital signal processing, their integration into neural vocoders and co"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":false},"canonical_record":{"source":{"id":"2512.20211","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2025-12-23T10:04:48Z","cross_cats_sorted":["eess.AS","eess.SP"],"title_canon_sha256":"0f999c6eb495e20a94d0602f00d4c7d49a6e85ed152af098e9b1ca0a01956006","abstract_canon_sha256":"20c6bd67582d7d70363a5c554ca22afada52ad9e3adfe665876b4b939c453cba"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:32.065235Z","signature_b64":"Sc5kTpozYbjJCm+TFNZtqcX+GH+6rfcOYcDl7S2BzsEbHVyd7+2vfpsd1sv6cugUgfg1OFob8WwcKuRR5fcbCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ce522bbd52f4a25ec1c569dc0a83145f02e85bc2a0252fe4fd5ea87b3f0b6364","last_reissued_at":"2026-05-18T02:44:32.064541Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:32.064541Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Aliasing-Free Neural Audio Synthesis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Differentiable anti-aliasing modules in neural vocoders and codecs remove artifacts to boost music and singing synthesis.","cross_cats":["eess.AS","eess.SP"],"primary_cat":"cs.SD","authors_text":"Chaoren Wang, Jerry Li, Junan Zhang, Lauri Juvela, Yicheng Gu, Zhizheng Wu","submitted_at":"2025-12-23T10:04:48Z","abstract_excerpt":"In neural audio synthesis, neural vocoders and codecs are models that reconstruct waveforms from acoustic and latent representations, which are essential to the resulting audio quality. While current models are capable of generating perceptually natural speech, they still struggle with high-fidelity music and singing voice synthesis, as severe aliasing artifacts are introduced by non-linear activation functions and upsampling layers in existing architectures. Although various anti-aliasing techniques have been proposed in digital signal processing, their integration into neural vocoders and co"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Pupu-Vocoder and Pupu-Codec outperform existing systems on singing voice, music, and audio, while achieving comparable performance on speech.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the differentiable anti-aliasing modules can be inserted into standard neural vocoder and codec architectures without introducing new training instabilities or quality trade-offs that would negate the reported gains.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Pupu-Vocoder and Pupu-Codec integrate differentiable anti-aliasing into neural audio models to eliminate aliasing artifacts from non-linear activations and upsampling, yielding better results on music and singing voice.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Differentiable anti-aliasing modules in neural vocoders and codecs remove artifacts to boost music and singing synthesis.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"38340d3f845eda0db586faf042ff7ca319ee7b1e6139140c2e1a39e7189e9086"},"source":{"id":"2512.20211","kind":"arxiv","version":2},"verdict":{"id":"0a6318e0-87e2-4168-8f30-38dcf1c554ec","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T20:32:07.708294Z","strongest_claim":"Pupu-Vocoder and Pupu-Codec outperform existing systems on singing voice, music, and audio, while achieving comparable performance on speech.","one_line_summary":"Pupu-Vocoder and Pupu-Codec integrate differentiable anti-aliasing into neural audio models to eliminate aliasing artifacts from non-linear activations and upsampling, yielding better results on music and singing voice.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the differentiable anti-aliasing modules can be inserted into standard neural vocoder and codec architectures without introducing new training instabilities or quality trade-offs that would negate the reported gains.","pith_extraction_headline":"Differentiable anti-aliasing modules in neural vocoders and codecs remove artifacts to boost music and singing synthesis."},"references":{"count":102,"sample":[{"doi":"","year":2025,"title":"MaskGCT: Zero-Shot Text-to-Speech with Masked Generative Codec Transformer,","work_id":"7f395e27-1680-427e-85ec-feb0fa778517","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Vevo: Controllable Zero-Shot V oice Imitation with Self-Supervised Disentanglement,","work_id":"6e874114-8eb4-4847-9d3c-faffc8c225c7","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Neural Codec Language Models are Zero-Shot Text to Speech Synthesizers","work_id":"de8fb688-dd63-4942-92f6-66d98d5b6db2","ref_index":3,"cited_arxiv_id":"2301.02111","is_internal_anchor":true},{"doi":"","year":2018,"title":"Efficient Neural Audio Synthe- sis,","work_id":"dd1f2d29-2d5c-4908-8977-381f3aa85ca0","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2019,"title":"LPCNet: Improving Neural Speech Synthesis through Linear Prediction,","work_id":"eaa422fc-9dae-4c50-bdd5-3369741caaf9","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":102,"snapshot_sha256":"aab8299c10b3bff11b77014e5173cd97a7e79e57eaad4ae8982796f6ef02f2e0","internal_anchors":2},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2512.20211","created_at":"2026-05-18T02:44:32.064658+00:00"},{"alias_kind":"arxiv_version","alias_value":"2512.20211v2","created_at":"2026-05-18T02:44:32.064658+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.20211","created_at":"2026-05-18T02:44:32.064658+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZZJCXPKS6SRF","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZZJCXPKS6SRF5QOF","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZZJCXPKS","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4","json":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4.json","graph_json":"https://pith.science/api/pith-number/ZZJCXPKS6SRF5QOFNHOAVAYUL4/graph.json","events_json":"https://pith.science/api/pith-number/ZZJCXPKS6SRF5QOFNHOAVAYUL4/events.json","paper":"https://pith.science/paper/ZZJCXPKS"},"agent_actions":{"view_html":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4","download_json":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4.json","view_paper":"https://pith.science/paper/ZZJCXPKS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2512.20211&json=true","fetch_graph":"https://pith.science/api/pith-number/ZZJCXPKS6SRF5QOFNHOAVAYUL4/graph.json","fetch_events":"https://pith.science/api/pith-number/ZZJCXPKS6SRF5QOFNHOAVAYUL4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4/action/storage_attestation","attest_author":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4/action/author_attestation","sign_citation":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4/action/citation_signature","submit_replication":"https://pith.science/pith/ZZJCXPKS6SRF5QOFNHOAVAYUL4/action/replication_record"}},"created_at":"2026-05-18T02:44:32.064658+00:00","updated_at":"2026-05-18T02:44:32.064658+00:00"}