{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:CAMJIUCJJ5FGTYDOR3GCYNLLZ2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"06f08c7abc09a7e640530fc942dd160a722b054c14decb6ad49df7a6b74c1991","cross_cats_sorted":["eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T05:42:01Z","title_canon_sha256":"a62e516a3965a9ffb745d30b1748dc17a58b1edbb19828a7c97d693471721e9b"},"schema_version":"1.0","source":{"id":"2607.01733","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.01733","created_at":"2026-07-03T01:17:28Z"},{"alias_kind":"arxiv_version","alias_value":"2607.01733v1","created_at":"2026-07-03T01:17:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.01733","created_at":"2026-07-03T01:17:28Z"},{"alias_kind":"pith_short_12","alias_value":"CAMJIUCJJ5FG","created_at":"2026-07-03T01:17:28Z"},{"alias_kind":"pith_short_16","alias_value":"CAMJIUCJJ5FGTYDO","created_at":"2026-07-03T01:17:28Z"},{"alias_kind":"pith_short_8","alias_value":"CAMJIUCJ","created_at":"2026-07-03T01:17:28Z"}],"graph_snapshots":[{"event_id":"sha256:dd00df03a9cc6f48b7699609919eb58fd72b262919bff8ef4b011657df74122f","target":"graph","created_at":"2026-07-03T01:17:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2607.01733/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Speech-LLM integration has shown promising results by leveraging extensive textual pretraining, yet its specific benefits for automatic speech recognition (ASR) remain unclear. We observe that as supervised ASR training data increases, the contribution of LLM priors becomes less evident, and simple speech-text joint training under-utilizes textual knowledge. We therefore propose Joint Speech-Text Interleaved Pretraining (JSTIP), an ASR-oriented pretraining strategy that constructs word-level and segment-level interleaved speech-text sequences within aligned pairs for speech-LLM architectures t","authors_text":"Ali Zare, Bo Ren, Jinyu Li, Junkun Chen, Keqi Deng, Liliang Ren, Ruchao Fan, Rui Zhao, Xiaoyang Chen, Yan Huang, Yelong Shen, Yiming Wang, Yuxuan Hu","cross_cats":["eess.AS"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T05:42:01Z","title":"Rethinking Speech-LLM Integration for ASR: Effective Joint Speech-Text Training by Interleaving"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.01733","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:641ff370da20ac7c4499a6ce99e798658dbe0431c97274c0238f6432df5da1be","target":"record","created_at":"2026-07-03T01:17:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"06f08c7abc09a7e640530fc942dd160a722b054c14decb6ad49df7a6b74c1991","cross_cats_sorted":["eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-07-02T05:42:01Z","title_canon_sha256":"a62e516a3965a9ffb745d30b1748dc17a58b1edbb19828a7c97d693471721e9b"},"schema_version":"1.0","source":{"id":"2607.01733","kind":"arxiv","version":1}},"canonical_sha256":"10189450494f4a69e06e8ecc2c356bce970f78088b865be1150001d678c717aa","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"10189450494f4a69e06e8ecc2c356bce970f78088b865be1150001d678c717aa","first_computed_at":"2026-07-03T01:17:28.092501Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-03T01:17:28.092501Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"W+qgixF4cwVZZQHy/BUgzVBPG/oxy5UMsuPPJhskkg98zrdrCG4uwII57d4ufYnvZ6OYSv9iRPztKJTrWbC6BA==","signature_status":"signed_v1","signed_at":"2026-07-03T01:17:28.092924Z","signed_message":"canonical_sha256_bytes"},"source_id":"2607.01733","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:641ff370da20ac7c4499a6ce99e798658dbe0431c97274c0238f6432df5da1be","sha256:dd00df03a9cc6f48b7699609919eb58fd72b262919bff8ef4b011657df74122f"],"state_sha256":"4c3d2e7fcf0349066f27de20b6ec80e5cf65d11cdddaec23d7d8eaa13729fead"}