{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:3WPUQTQJIZHAOYK3PO66XAOE6P","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6f187380462afd5fa2186460f6bcbec84e8edce8cc37704a8da07df152e04c8b","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-02-28T11:32:22Z","title_canon_sha256":"2e3851572313fdaf22e98a95068ff2f1aa52e905b8cc80ce88c09ae802370b60"},"schema_version":"1.0","source":{"id":"2502.20969","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.20969","created_at":"2026-05-20T00:05:27Z"},{"alias_kind":"arxiv_version","alias_value":"2502.20969v4","created_at":"2026-05-20T00:05:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.20969","created_at":"2026-05-20T00:05:27Z"},{"alias_kind":"pith_short_12","alias_value":"3WPUQTQJIZHA","created_at":"2026-05-20T00:05:27Z"},{"alias_kind":"pith_short_16","alias_value":"3WPUQTQJIZHAOYK3","created_at":"2026-05-20T00:05:27Z"},{"alias_kind":"pith_short_8","alias_value":"3WPUQTQJ","created_at":"2026-05-20T00:05:27Z"}],"graph_snapshots":[{"event_id":"sha256:65a56984b3f48b7989fdbb5d041849376b5d05f4ea75f10273792c7ba1850be0","target":"graph","created_at":"2026-05-20T00:05:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2502.20969/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Retrieval-augmented generation (RAG) extends large language models (LLMs) with external data sources to enhance factual correctness and domain coverage. Modern RAG pipelines rely on large datastores, creating a significant system challenge: achieving high throughput and low latency is difficult, especially when GPU memory is limited. To address these challenges, we propose TeleRAG, an efficient inference system that reduces latency and improves throughput with minimal GPU memory requirements. The core innovation of TeleRAG is lookahead retrieval, a prefetching mechanism that predicts required ","authors_text":"Arvind Krishnamurthy, Baris Kasikci, Chien-Yu Lin, Kan Zhu, Keisuke Kamahori, Luis Ceze, Madhav Kashyap, Rohan Kadekodi, Rulin Shao, Stephanie Wang, Xiaoxiang Shi, Yile Gu, Yiyu Liu, Zihao Ye","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-02-28T11:32:22Z","title":"TeleRAG: Efficient Retrieval-Augmented Generation Inference with Lookahead Retrieval"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.20969","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ded9c308c4c59cfcf76436170dc2c996f2de3e41daa1b805fb6e36ea2aedd4aa","target":"record","created_at":"2026-05-20T00:05:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6f187380462afd5fa2186460f6bcbec84e8edce8cc37704a8da07df152e04c8b","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-02-28T11:32:22Z","title_canon_sha256":"2e3851572313fdaf22e98a95068ff2f1aa52e905b8cc80ce88c09ae802370b60"},"schema_version":"1.0","source":{"id":"2502.20969","kind":"arxiv","version":4}},"canonical_sha256":"dd9f484e09464e07615b7bbdeb81c4f3cf45ca68288691ff4e3250623c609e87","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dd9f484e09464e07615b7bbdeb81c4f3cf45ca68288691ff4e3250623c609e87","first_computed_at":"2026-05-20T00:05:27.326682Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:05:27.326682Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"G+6K7W3vvD58Rh1WDhsfcv7YB1wX/1Q3VX6r1ogdBA/9rV6sr5tSvmdFTyaiYJ183NcQELqlls8m9whGjubFCA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:05:27.327598Z","signed_message":"canonical_sha256_bytes"},"source_id":"2502.20969","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ded9c308c4c59cfcf76436170dc2c996f2de3e41daa1b805fb6e36ea2aedd4aa","sha256:65a56984b3f48b7989fdbb5d041849376b5d05f4ea75f10273792c7ba1850be0"],"state_sha256":"e4d68957c8b8620555d64d049c03299eab36edee193cfdd8ce8793651da24192"}