{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DXFDO6JMX7ZMDQ4BVRGCXKJFQB","short_pith_number":"pith:DXFDO6JM","canonical_record":{"source":{"id":"2605.01708","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-03T04:22:51Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"a600cd2c62402b48a1aef292d2a03cb0951e2a728aac1f0c8a528c9756aebb87","abstract_canon_sha256":"f155781a755c59160d66c77c6c53ce1bb7a5bcaa772ab84b0ab7303920158ce4"},"schema_version":"1.0"},"canonical_sha256":"1dca37792cbff2c1c381ac4c2ba925805b9af493c0386e681a0230b4b868b621","source":{"kind":"arxiv","id":"2605.01708","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.01708","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.01708v3","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.01708","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"pith_short_12","alias_value":"DXFDO6JMX7ZM","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"pith_short_16","alias_value":"DXFDO6JMX7ZMDQ4B","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"pith_short_8","alias_value":"DXFDO6JM","created_at":"2026-06-25T00:18:14Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DXFDO6JMX7ZMDQ4BVRGCXKJFQB","target":"record","payload":{"canonical_record":{"source":{"id":"2605.01708","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-03T04:22:51Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"a600cd2c62402b48a1aef292d2a03cb0951e2a728aac1f0c8a528c9756aebb87","abstract_canon_sha256":"f155781a755c59160d66c77c6c53ce1bb7a5bcaa772ab84b0ab7303920158ce4"},"schema_version":"1.0"},"canonical_sha256":"1dca37792cbff2c1c381ac4c2ba925805b9af493c0386e681a0230b4b868b621","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-25T00:18:14.094630Z","signature_b64":"FpRlp+oAANBdWqmwjul4AGyUkLrvFec1bF+4VkcsEiZ1IP9mPKsqpGv5uG48KaoCbXuw1/u5XAXI30emkbLEAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1dca37792cbff2c1c381ac4c2ba925805b9af493c0386e681a0230b4b868b621","last_reissued_at":"2026-06-25T00:18:14.094134Z","signature_status":"signed_v1","first_computed_at":"2026-06-25T00:18:14.094134Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.01708","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-25T00:18:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iYzxtE6t+c63razrxu7niUA3UskHhs0DPjhdKowDrP6+0ZQ/eM6L9a5s0uX3AGv+qVI3oK9Kb2qIhlpkuYf/DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T05:54:05.449592Z"},"content_sha256":"4650c14e3bb0669916ca9ab9dfb6ed3c9c9742189d18ce7b93384aaf38a27042","schema_version":"1.0","event_id":"sha256:4650c14e3bb0669916ca9ab9dfb6ed3c9c9742189d18ce7b93384aaf38a27042"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DXFDO6JMX7ZMDQ4BVRGCXKJFQB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SplitZip: Ultra Fast Lossless KV Compression for Disaggregated LLM Serving","license":"http://creativecommons.org/licenses/by/4.0/","headline":"SplitZip achieves over 600 GB/s lossless KV cache compression on GPUs by encoding frequent exponents with fixed-length codes and routing rare ones through a sparse escape stream.","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.DC","authors_text":"Siddharth Joshi, Yipin Guo","submitted_at":"2026-05-03T04:22:51Z","abstract_excerpt":"Contemporary systems serving large language models (LLMs) have adopted prefill-decode disaggregation to load-balance between the compute-bound prefill phase and the memory-bound decode phase. Under this design, prefill workers generate a KV cache that must be transferred to decode workers before generation can begin. With these workers residing on different physical systems, this transfer becomes a significant bottleneck to serving LLMs at scale, especially for long-input and agentic workloads. Existing lossless codecs are unsuitable here as they primarily target offline weight compression, ru"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"SplitZip achieves 613.3 GB/s compression throughput and 2181.8 GB/s decompression throughput on real BF16 activation tensors, providing up to 1.32× speedup for BF16 KV cache transfer, 1.30× speedup for TTFT, and 1.23× increase on Request Throughput.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The assumption that an offline calibrated top-16 exponent codebook will effectively capture the distribution of exponents in online KV activations during prefill without significant loss in compression ratio or speed, and that the method integrates seamlessly into existing serving frameworks.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SplitZip is a new GPU-friendly lossless compressor for KV cache tensors that exploits exponent redundancy to achieve over 600 GB/s compression throughput and up to 1.32x faster transfers in disaggregated LLM serving.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"SplitZip achieves over 600 GB/s lossless KV cache compression on GPUs by encoding frequent exponents with fixed-length codes and routing rare ones through a sparse escape stream.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"acf6829146e8686b4757c2b8a87985816d970f87bdf56b53c37f11c7bc90751d"},"source":{"id":"2605.01708","kind":"arxiv","version":3},"verdict":{"id":"46123149-2552-41a3-b020-0f5c380c5802","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-12T01:10:11.164145Z","strongest_claim":"SplitZip achieves 613.3 GB/s compression throughput and 2181.8 GB/s decompression throughput on real BF16 activation tensors, providing up to 1.32× speedup for BF16 KV cache transfer, 1.30× speedup for TTFT, and 1.23× increase on Request Throughput.","one_line_summary":"SplitZip is a new GPU-friendly lossless compressor for KV cache tensors that exploits exponent redundancy to achieve over 600 GB/s compression throughput and up to 1.32x faster transfers in disaggregated LLM serving.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The assumption that an offline calibrated top-16 exponent codebook will effectively capture the distribution of exponents in online KV activations during prefill without significant loss in compression ratio or speed, and that the method integrates seamlessly into existing serving frameworks.","pith_extraction_headline":"SplitZip achieves over 600 GB/s lossless KV cache compression on GPUs by encoding frequent exponents with fixed-length codes and routing rare ones through a sparse escape stream."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.01708/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-20T17:37:55.769586Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-20T05:01:23.276394Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T17:01:14.480297Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"1638d3400438f90dda5de29532e62ee4b0160ca666d25feed15538e353ea0384"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"e4b67163d9e6070a7810bb8b5fff3f3b2b03f60db21d1a1e6e804fdfbd83d093"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"46123149-2552-41a3-b020-0f5c380c5802"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-25T00:18:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"p8I3DyDiJaAzYyQGJVO1uZE7R56V2+ty58hhYkPXOaEsG0Ju3UO6z/AIbX17v1ObgobrifTLJyn0SL8cdlMDCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T05:54:05.450418Z"},"content_sha256":"bcde20a1c7dfee966c8ee01e7f8aef9c512590b4a953da65a56d061e04568376","schema_version":"1.0","event_id":"sha256:bcde20a1c7dfee966c8ee01e7f8aef9c512590b4a953da65a56d061e04568376"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DXFDO6JMX7ZMDQ4BVRGCXKJFQB/bundle.json","state_url":"https://pith.science/pith/DXFDO6JMX7ZMDQ4BVRGCXKJFQB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DXFDO6JMX7ZMDQ4BVRGCXKJFQB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-28T05:54:05Z","links":{"resolver":"https://pith.science/pith/DXFDO6JMX7ZMDQ4BVRGCXKJFQB","bundle":"https://pith.science/pith/DXFDO6JMX7ZMDQ4BVRGCXKJFQB/bundle.json","state":"https://pith.science/pith/DXFDO6JMX7ZMDQ4BVRGCXKJFQB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DXFDO6JMX7ZMDQ4BVRGCXKJFQB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DXFDO6JMX7ZMDQ4BVRGCXKJFQB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f155781a755c59160d66c77c6c53ce1bb7a5bcaa772ab84b0ab7303920158ce4","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-03T04:22:51Z","title_canon_sha256":"a600cd2c62402b48a1aef292d2a03cb0951e2a728aac1f0c8a528c9756aebb87"},"schema_version":"1.0","source":{"id":"2605.01708","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.01708","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.01708v3","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.01708","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"pith_short_12","alias_value":"DXFDO6JMX7ZM","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"pith_short_16","alias_value":"DXFDO6JMX7ZMDQ4B","created_at":"2026-06-25T00:18:14Z"},{"alias_kind":"pith_short_8","alias_value":"DXFDO6JM","created_at":"2026-06-25T00:18:14Z"}],"graph_snapshots":[{"event_id":"sha256:bcde20a1c7dfee966c8ee01e7f8aef9c512590b4a953da65a56d061e04568376","target":"graph","created_at":"2026-06-25T00:18:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"SplitZip achieves 613.3 GB/s compression throughput and 2181.8 GB/s decompression throughput on real BF16 activation tensors, providing up to 1.32× speedup for BF16 KV cache transfer, 1.30× speedup for TTFT, and 1.23× increase on Request Throughput."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The assumption that an offline calibrated top-16 exponent codebook will effectively capture the distribution of exponents in online KV activations during prefill without significant loss in compression ratio or speed, and that the method integrates seamlessly into existing serving frameworks."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"SplitZip is a new GPU-friendly lossless compressor for KV cache tensors that exploits exponent redundancy to achieve over 600 GB/s compression throughput and up to 1.32x faster transfers in disaggregated LLM serving."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"SplitZip achieves over 600 GB/s lossless KV cache compression on GPUs by encoding frequent exponents with fixed-length codes and routing rare ones through a sparse escape stream."}],"snapshot_sha256":"acf6829146e8686b4757c2b8a87985816d970f87bdf56b53c37f11c7bc90751d"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"e4b67163d9e6070a7810bb8b5fff3f3b2b03f60db21d1a1e6e804fdfbd83d093"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-20T17:37:55.769586Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-20T05:01:23.276394Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T17:01:14.480297Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.01708/integrity.json","findings":[],"snapshot_sha256":"1638d3400438f90dda5de29532e62ee4b0160ca666d25feed15538e353ea0384","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Contemporary systems serving large language models (LLMs) have adopted prefill-decode disaggregation to load-balance between the compute-bound prefill phase and the memory-bound decode phase. Under this design, prefill workers generate a KV cache that must be transferred to decode workers before generation can begin. With these workers residing on different physical systems, this transfer becomes a significant bottleneck to serving LLMs at scale, especially for long-input and agentic workloads. Existing lossless codecs are unsuitable here as they primarily target offline weight compression, ru","authors_text":"Siddharth Joshi, Yipin Guo","cross_cats":["cs.AI","cs.LG"],"headline":"SplitZip achieves over 600 GB/s lossless KV cache compression on GPUs by encoding frequent exponents with fixed-length codes and routing rare ones through a sparse escape stream.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-03T04:22:51Z","title":"SplitZip: Ultra Fast Lossless KV Compression for Disaggregated LLM Serving"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.01708","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-12T01:10:11.164145Z","id":"46123149-2552-41a3-b020-0f5c380c5802","model_set":{"reader":"grok-4.3"},"one_line_summary":"SplitZip is a new GPU-friendly lossless compressor for KV cache tensors that exploits exponent redundancy to achieve over 600 GB/s compression throughput and up to 1.32x faster transfers in disaggregated LLM serving.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"SplitZip achieves over 600 GB/s lossless KV cache compression on GPUs by encoding frequent exponents with fixed-length codes and routing rare ones through a sparse escape stream.","strongest_claim":"SplitZip achieves 613.3 GB/s compression throughput and 2181.8 GB/s decompression throughput on real BF16 activation tensors, providing up to 1.32× speedup for BF16 KV cache transfer, 1.30× speedup for TTFT, and 1.23× increase on Request Throughput.","weakest_assumption":"The assumption that an offline calibrated top-16 exponent codebook will effectively capture the distribution of exponents in online KV activations during prefill without significant loss in compression ratio or speed, and that the method integrates seamlessly into existing serving frameworks."}},"verdict_id":"46123149-2552-41a3-b020-0f5c380c5802"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4650c14e3bb0669916ca9ab9dfb6ed3c9c9742189d18ce7b93384aaf38a27042","target":"record","created_at":"2026-06-25T00:18:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f155781a755c59160d66c77c6c53ce1bb7a5bcaa772ab84b0ab7303920158ce4","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-05-03T04:22:51Z","title_canon_sha256":"a600cd2c62402b48a1aef292d2a03cb0951e2a728aac1f0c8a528c9756aebb87"},"schema_version":"1.0","source":{"id":"2605.01708","kind":"arxiv","version":3}},"canonical_sha256":"1dca37792cbff2c1c381ac4c2ba925805b9af493c0386e681a0230b4b868b621","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1dca37792cbff2c1c381ac4c2ba925805b9af493c0386e681a0230b4b868b621","first_computed_at":"2026-06-25T00:18:14.094134Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-25T00:18:14.094134Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"FpRlp+oAANBdWqmwjul4AGyUkLrvFec1bF+4VkcsEiZ1IP9mPKsqpGv5uG48KaoCbXuw1/u5XAXI30emkbLEAA==","signature_status":"signed_v1","signed_at":"2026-06-25T00:18:14.094630Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.01708","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4650c14e3bb0669916ca9ab9dfb6ed3c9c9742189d18ce7b93384aaf38a27042","sha256:bcde20a1c7dfee966c8ee01e7f8aef9c512590b4a953da65a56d061e04568376"],"state_sha256":"25ba2cdda76713ea4a886ef992e2f301274b11cadb790818cdd23ecbd5651d7d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"u5wXI4XyEoROSBeV4mtCePbwt7dBlD2Mmy6XTr/S0CPYOYAQ7pBkGtm1i+gzJ7Dm2o7CKlVZ+jsCmAz8TIKWBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-28T05:54:05.452679Z","bundle_sha256":"89726ebaec486b9ffe5ef4e57b07cb29c01ab0859748af8da05fff4efbed4567"}}