{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:DZFZTF2HEGOF4M4UKWIWWOSYNA","short_pith_number":"pith:DZFZTF2H","canonical_record":{"source":{"id":"2512.19179","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2025-12-22T09:13:40Z","cross_cats_sorted":[],"title_canon_sha256":"46e83ed504e39f28aacaebe79f5a1bf386eb8b999e44ca428ff14b8f22f2e675","abstract_canon_sha256":"57e5ecf6cc9af1f5c74ab57706a6aa8fda8d082ed63d86d51e7b53cf130ccfb3"},"schema_version":"1.0"},"canonical_sha256":"1e4b999747219c5e339455916b3a586811a33a474aff200ab3da14f4175deeb6","source":{"kind":"arxiv","id":"2512.19179","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.19179","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"arxiv_version","alias_value":"2512.19179v3","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.19179","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"pith_short_12","alias_value":"DZFZTF2HEGOF","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"pith_short_16","alias_value":"DZFZTF2HEGOF4M4U","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"pith_short_8","alias_value":"DZFZTF2H","created_at":"2026-05-20T00:00:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:DZFZTF2HEGOF4M4UKWIWWOSYNA","target":"record","payload":{"canonical_record":{"source":{"id":"2512.19179","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2025-12-22T09:13:40Z","cross_cats_sorted":[],"title_canon_sha256":"46e83ed504e39f28aacaebe79f5a1bf386eb8b999e44ca428ff14b8f22f2e675","abstract_canon_sha256":"57e5ecf6cc9af1f5c74ab57706a6aa8fda8d082ed63d86d51e7b53cf130ccfb3"},"schema_version":"1.0"},"canonical_sha256":"1e4b999747219c5e339455916b3a586811a33a474aff200ab3da14f4175deeb6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:31.115718Z","signature_b64":"tYdozPa04pV3Nbx+d37eV9HnRhfF8hfnkH5incVV21QMtqGLUZmfaQT1eZCEU82AG38HZQtEyRWOXBzhvfO8BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1e4b999747219c5e339455916b3a586811a33a474aff200ab3da14f4175deeb6","last_reissued_at":"2026-05-20T00:00:31.114923Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:31.114923Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2512.19179","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CIPTn+IYzkfVXJ0QVZV6ksCxin/F9sGiPE5oUJQ+taDLOWDJqjaF7F1KY/qciG7/lCqiNpT4RnH1zEZtpFmtDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T19:06:40.710497Z"},"content_sha256":"a8aef4804d7c55ee53683c0051f3f99efe3d5e15b1d99ca6e66ae8290b7b9873","schema_version":"1.0","event_id":"sha256:a8aef4804d7c55ee53683c0051f3f99efe3d5e15b1d99ca6e66ae8290b7b9873"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:DZFZTF2HEGOF4M4UKWIWWOSYNA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"CascadeInfer: Length-Aware Scheduling of LLM Serving with Low Latency and Load Balancing","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"2), (2) ScitiX AI), Bohan Zhao (2), Chenqi Zhao (1), Wenfei Wu (1) ((1) Peking University, Yitao Yuan (1, Yongchao He (2), Zane Cao (2)","submitted_at":"2025-12-22T09:13:40Z","abstract_excerpt":"Efficiently harnessing GPU compute is critical to improving user experience and reducing operational costs in large language model (LLM) services. However, current inference engine schedulers overlook the attention backend's sensitivity to request-length heterogeneity within a batch. As state-of-the-art models now support context windows exceeding 128K tokens, this once-tolerable inefficiency has escalated into a primary system bottleneck, causing severe performance degradation through GPU underutilization and increased latency. We present CascadeInfer, a runtime system that dynamically resche"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.19179","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.19179/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yPZ0BfIuH51pjrZiu6aFfwc0wFeL2M0+Wk0rfwUt4/s5atTMJ97K12d3tFZZALMn3APj6bJBetoP+i1y/em5Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T19:06:40.710879Z"},"content_sha256":"ec097d77dd3a5febe62a9199a16b3d8a41edd63689f8614b9ff3c5a217164ca4","schema_version":"1.0","event_id":"sha256:ec097d77dd3a5febe62a9199a16b3d8a41edd63689f8614b9ff3c5a217164ca4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DZFZTF2HEGOF4M4UKWIWWOSYNA/bundle.json","state_url":"https://pith.science/pith/DZFZTF2HEGOF4M4UKWIWWOSYNA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DZFZTF2HEGOF4M4UKWIWWOSYNA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T19:06:40Z","links":{"resolver":"https://pith.science/pith/DZFZTF2HEGOF4M4UKWIWWOSYNA","bundle":"https://pith.science/pith/DZFZTF2HEGOF4M4UKWIWWOSYNA/bundle.json","state":"https://pith.science/pith/DZFZTF2HEGOF4M4UKWIWWOSYNA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DZFZTF2HEGOF4M4UKWIWWOSYNA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:DZFZTF2HEGOF4M4UKWIWWOSYNA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"57e5ecf6cc9af1f5c74ab57706a6aa8fda8d082ed63d86d51e7b53cf130ccfb3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2025-12-22T09:13:40Z","title_canon_sha256":"46e83ed504e39f28aacaebe79f5a1bf386eb8b999e44ca428ff14b8f22f2e675"},"schema_version":"1.0","source":{"id":"2512.19179","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.19179","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"arxiv_version","alias_value":"2512.19179v3","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.19179","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"pith_short_12","alias_value":"DZFZTF2HEGOF","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"pith_short_16","alias_value":"DZFZTF2HEGOF4M4U","created_at":"2026-05-20T00:00:31Z"},{"alias_kind":"pith_short_8","alias_value":"DZFZTF2H","created_at":"2026-05-20T00:00:31Z"}],"graph_snapshots":[{"event_id":"sha256:ec097d77dd3a5febe62a9199a16b3d8a41edd63689f8614b9ff3c5a217164ca4","target":"graph","created_at":"2026-05-20T00:00:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.19179/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Efficiently harnessing GPU compute is critical to improving user experience and reducing operational costs in large language model (LLM) services. However, current inference engine schedulers overlook the attention backend's sensitivity to request-length heterogeneity within a batch. As state-of-the-art models now support context windows exceeding 128K tokens, this once-tolerable inefficiency has escalated into a primary system bottleneck, causing severe performance degradation through GPU underutilization and increased latency. We present CascadeInfer, a runtime system that dynamically resche","authors_text":"2), (2) ScitiX AI), Bohan Zhao (2), Chenqi Zhao (1), Wenfei Wu (1) ((1) Peking University, Yitao Yuan (1, Yongchao He (2), Zane Cao (2)","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2025-12-22T09:13:40Z","title":"CascadeInfer: Length-Aware Scheduling of LLM Serving with Low Latency and Load Balancing"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.19179","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a8aef4804d7c55ee53683c0051f3f99efe3d5e15b1d99ca6e66ae8290b7b9873","target":"record","created_at":"2026-05-20T00:00:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"57e5ecf6cc9af1f5c74ab57706a6aa8fda8d082ed63d86d51e7b53cf130ccfb3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2025-12-22T09:13:40Z","title_canon_sha256":"46e83ed504e39f28aacaebe79f5a1bf386eb8b999e44ca428ff14b8f22f2e675"},"schema_version":"1.0","source":{"id":"2512.19179","kind":"arxiv","version":3}},"canonical_sha256":"1e4b999747219c5e339455916b3a586811a33a474aff200ab3da14f4175deeb6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1e4b999747219c5e339455916b3a586811a33a474aff200ab3da14f4175deeb6","first_computed_at":"2026-05-20T00:00:31.114923Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:31.114923Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tYdozPa04pV3Nbx+d37eV9HnRhfF8hfnkH5incVV21QMtqGLUZmfaQT1eZCEU82AG38HZQtEyRWOXBzhvfO8BQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:31.115718Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.19179","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a8aef4804d7c55ee53683c0051f3f99efe3d5e15b1d99ca6e66ae8290b7b9873","sha256:ec097d77dd3a5febe62a9199a16b3d8a41edd63689f8614b9ff3c5a217164ca4"],"state_sha256":"adf20f6e47d1c8c8644b30096cde1c877c224d53ef221a82a7c806e4de133dd0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lfvSKc06v9g1z+fF9X8bn/bVbiXhjNwOA5PADs9Zn2bomA4YDsokWTdCCHg+nk2x8HsYwhm1InNPRyD5K4uLAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T19:06:40.713811Z","bundle_sha256":"ff4f383263a2bfad83e5ee71437eb3893f97ee32a6e403c4510625d2b8c25388"}}