{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:52L7GHSY5GUXFAP75LMDCPPQPD","short_pith_number":"pith:52L7GHSY","canonical_record":{"source":{"id":"2605.23463","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-22T10:24:50Z","cross_cats_sorted":[],"title_canon_sha256":"0c00a6dead5d7972a9e0f41fa860b0c7c087270a7d56516d8913d5dfef81fb5d","abstract_canon_sha256":"e856e101451a9df82592cb0456744f781a12f3762ecadba10ff9201be037f2fe"},"schema_version":"1.0"},"canonical_sha256":"ee97f31e58e9a97281ffead8313df078c1afea09956f9d7fa8c4ae743a750983","source":{"kind":"arxiv","id":"2605.23463","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23463","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23463v1","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23463","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"pith_short_12","alias_value":"52L7GHSY5GUX","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"pith_short_16","alias_value":"52L7GHSY5GUXFAP7","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"pith_short_8","alias_value":"52L7GHSY","created_at":"2026-05-25T02:01:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:52L7GHSY5GUXFAP75LMDCPPQPD","target":"record","payload":{"canonical_record":{"source":{"id":"2605.23463","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-22T10:24:50Z","cross_cats_sorted":[],"title_canon_sha256":"0c00a6dead5d7972a9e0f41fa860b0c7c087270a7d56516d8913d5dfef81fb5d","abstract_canon_sha256":"e856e101451a9df82592cb0456744f781a12f3762ecadba10ff9201be037f2fe"},"schema_version":"1.0"},"canonical_sha256":"ee97f31e58e9a97281ffead8313df078c1afea09956f9d7fa8c4ae743a750983","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:56.116432Z","signature_b64":"iYHmPp0NnmtEnT+toh29l2o1kVlam+49x6x5j6qbxL71l5h5LlQP4+tjh1vhmKTPsoiRTjNcLADsWc+K6ONuAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ee97f31e58e9a97281ffead8313df078c1afea09956f9d7fa8c4ae743a750983","last_reissued_at":"2026-05-25T02:01:56.115619Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:56.115619Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.23463","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2hIc8Ud7fl1SDsKeILvbW5FcAT17eBC3vjqM4VI3t1r/mMgoU9vB2ekM9K2GKy3mtM/NSAXyLwcbpQQuYamtBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T18:16:16.676510Z"},"content_sha256":"11f65ee12c8c90aff4b5998488e4c15c39f76d7d716c87a7e273765d0ac667f0","schema_version":"1.0","event_id":"sha256:11f65ee12c8c90aff4b5998488e4c15c39f76d7d716c87a7e273765d0ac667f0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:52L7GHSY5GUXFAP75LMDCPPQPD","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"StepAudio 2.5 Technical Report","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"eess.AS","authors_text":"Bin Lin, Boyong Wu, Bo Zhao, Brian Li, Changlin Zhang, Chang Zeng, Chao Yan, Chen Geng, Chenghao Dong, Chengli Feng, Cheng Yi, Chengyuan Yao, Chen Wu, Daijiao Liu, Danni Wan, Dan Zhou, Daxin Jiang, Di Chen, Die Zhang, Dongqing Pang, Fei Tian, Feng Tian, Future Li, Gang Yu, Guanglong Yang, Guoqiang Hu, Haiyang Sun, Haoyang Zhang, Huangxi Zhu, Jiangjie Zhen, Jianzheng Gao, Jinghua Liang, Jinglan Gong, Jinmei Wan, Jun Chen, Junjie Yuan, Kang An, Lei Lei, Limin Zhong, Li Xie, Lun Cai, Mengqiang Ren, Mingliang Li, Mingxiao Li, Min Xu, Na Wang, Peilin Li, Pengfei Tan, Peng Yang, Qiang Tong, Qiaoling Huang, Qingfu Du, Qingjian Lin, Rui Wang, Runze Li, Shengchen Zhou, Shenghua Hu, Shihao Peng, Shiliang Yang, Shi Qiu, Siqi Tu, Siyi Zhou, Tianjiao Deng, Ting Xu, Tong Wang, WeiMing Niu, Wenwen Qu, Wuxun Xie, Xiangyu Li, Xiangyu Tony Zhang, Xiangyu Zhang, Xianwei Zhang, Xianyu Feng, Xiaojia Liu, Xing Chen, Xiongbin Wu, Xuerui Yang, Yang Li, Yang Yang, Yan Wu, Yechang Huang, Yibo Zhu, Yifan Zhang, Yile Liu, Yi Liu, Yongshen Long, Yuanhao Ding, Yuchu Luo, Yu Fu, Yuhao Wang, Yuhe Yin, Yu Luo, Yunfang Xu, Yuxiang Yang, Yuxin Li, Yuxin Zhang, Zhengyan Sheng, Zhiguo Huang, Zhiyue Wu, Zichao Li, Zichao Zhou","submitted_at":"2026-05-22T10:24:50Z","abstract_excerpt":"Unified audio-language modeling has emerged as a prominent trend in modern speech systems, promising to bring the reasoning capabilities of large language models to auditory tasks. However, existing unified foundations often struggle to match the depth of specialized systems across automatic speech recognition (ASR), text-to-speech synthesis (TTS), and realtime spoken interaction. Bridging this gap remains an open challenge. This report presents StepAudio 2.5, a unified audio-language foundation model that matches or exceeds specialized systems across all three capabilities. Rather than treati"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23463","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.23463/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:56Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qf/egR5b/rVjC4I6Pa0UvVYgZNzvesGce8lb7ePFnJ55aUtp8zGu7sMj2J6tC60eepKBARm4C4ep4YCNosVGAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T18:16:16.677339Z"},"content_sha256":"461b36a521332bdcea98cc71399df94d94ab440976235509f02c0b8173204f15","schema_version":"1.0","event_id":"sha256:461b36a521332bdcea98cc71399df94d94ab440976235509f02c0b8173204f15"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/52L7GHSY5GUXFAP75LMDCPPQPD/bundle.json","state_url":"https://pith.science/pith/52L7GHSY5GUXFAP75LMDCPPQPD/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/52L7GHSY5GUXFAP75LMDCPPQPD/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T18:16:16Z","links":{"resolver":"https://pith.science/pith/52L7GHSY5GUXFAP75LMDCPPQPD","bundle":"https://pith.science/pith/52L7GHSY5GUXFAP75LMDCPPQPD/bundle.json","state":"https://pith.science/pith/52L7GHSY5GUXFAP75LMDCPPQPD/state.json","well_known_bundle":"https://pith.science/.well-known/pith/52L7GHSY5GUXFAP75LMDCPPQPD/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:52L7GHSY5GUXFAP75LMDCPPQPD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e856e101451a9df82592cb0456744f781a12f3762ecadba10ff9201be037f2fe","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-22T10:24:50Z","title_canon_sha256":"0c00a6dead5d7972a9e0f41fa860b0c7c087270a7d56516d8913d5dfef81fb5d"},"schema_version":"1.0","source":{"id":"2605.23463","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23463","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23463v1","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23463","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"pith_short_12","alias_value":"52L7GHSY5GUX","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"pith_short_16","alias_value":"52L7GHSY5GUXFAP7","created_at":"2026-05-25T02:01:56Z"},{"alias_kind":"pith_short_8","alias_value":"52L7GHSY","created_at":"2026-05-25T02:01:56Z"}],"graph_snapshots":[{"event_id":"sha256:461b36a521332bdcea98cc71399df94d94ab440976235509f02c0b8173204f15","target":"graph","created_at":"2026-05-25T02:01:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.23463/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Unified audio-language modeling has emerged as a prominent trend in modern speech systems, promising to bring the reasoning capabilities of large language models to auditory tasks. However, existing unified foundations often struggle to match the depth of specialized systems across automatic speech recognition (ASR), text-to-speech synthesis (TTS), and realtime spoken interaction. Bridging this gap remains an open challenge. This report presents StepAudio 2.5, a unified audio-language foundation model that matches or exceeds specialized systems across all three capabilities. Rather than treati","authors_text":"Bin Lin, Boyong Wu, Bo Zhao, Brian Li, Changlin Zhang, Chang Zeng, Chao Yan, Chen Geng, Chenghao Dong, Chengli Feng, Cheng Yi, Chengyuan Yao, Chen Wu, Daijiao Liu, Danni Wan, Dan Zhou, Daxin Jiang, Di Chen, Die Zhang, Dongqing Pang, Fei Tian, Feng Tian, Future Li, Gang Yu, Guanglong Yang, Guoqiang Hu, Haiyang Sun, Haoyang Zhang, Huangxi Zhu, Jiangjie Zhen, Jianzheng Gao, Jinghua Liang, Jinglan Gong, Jinmei Wan, Jun Chen, Junjie Yuan, Kang An, Lei Lei, Limin Zhong, Li Xie, Lun Cai, Mengqiang Ren, Mingliang Li, Mingxiao Li, Min Xu, Na Wang, Peilin Li, Pengfei Tan, Peng Yang, Qiang Tong, Qiaoling Huang, Qingfu Du, Qingjian Lin, Rui Wang, Runze Li, Shengchen Zhou, Shenghua Hu, Shihao Peng, Shiliang Yang, Shi Qiu, Siqi Tu, Siyi Zhou, Tianjiao Deng, Ting Xu, Tong Wang, WeiMing Niu, Wenwen Qu, Wuxun Xie, Xiangyu Li, Xiangyu Tony Zhang, Xiangyu Zhang, Xianwei Zhang, Xianyu Feng, Xiaojia Liu, Xing Chen, Xiongbin Wu, Xuerui Yang, Yang Li, Yang Yang, Yan Wu, Yechang Huang, Yibo Zhu, Yifan Zhang, Yile Liu, Yi Liu, Yongshen Long, Yuanhao Ding, Yuchu Luo, Yu Fu, Yuhao Wang, Yuhe Yin, Yu Luo, Yunfang Xu, Yuxiang Yang, Yuxin Li, Yuxin Zhang, Zhengyan Sheng, Zhiguo Huang, Zhiyue Wu, Zichao Li, Zichao Zhou","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-22T10:24:50Z","title":"StepAudio 2.5 Technical Report"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23463","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:11f65ee12c8c90aff4b5998488e4c15c39f76d7d716c87a7e273765d0ac667f0","target":"record","created_at":"2026-05-25T02:01:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e856e101451a9df82592cb0456744f781a12f3762ecadba10ff9201be037f2fe","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-22T10:24:50Z","title_canon_sha256":"0c00a6dead5d7972a9e0f41fa860b0c7c087270a7d56516d8913d5dfef81fb5d"},"schema_version":"1.0","source":{"id":"2605.23463","kind":"arxiv","version":1}},"canonical_sha256":"ee97f31e58e9a97281ffead8313df078c1afea09956f9d7fa8c4ae743a750983","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ee97f31e58e9a97281ffead8313df078c1afea09956f9d7fa8c4ae743a750983","first_computed_at":"2026-05-25T02:01:56.115619Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:01:56.115619Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"iYHmPp0NnmtEnT+toh29l2o1kVlam+49x6x5j6qbxL71l5h5LlQP4+tjh1vhmKTPsoiRTjNcLADsWc+K6ONuAw==","signature_status":"signed_v1","signed_at":"2026-05-25T02:01:56.116432Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.23463","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:11f65ee12c8c90aff4b5998488e4c15c39f76d7d716c87a7e273765d0ac667f0","sha256:461b36a521332bdcea98cc71399df94d94ab440976235509f02c0b8173204f15"],"state_sha256":"cda07f3825a6b1932eadddbe05c7fdd95fc83ae3ff5e7c20b387641daaabf58e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Wmj0+SpgjJzzuB4CHbePUSCdaOVPUGEbxw/2VMWxOv+McmG6kPfxBZzD+qSIDD1NxjA9t0McteEpQYROf0ZaBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T18:16:16.681266Z","bundle_sha256":"5ef227a7e09e6d548fe4a24062a2236a946691e5aacb1c5c4ecb91f589bdaa3b"}}