{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:K7GCZWEWIH3HOGX5K7WVXGZQJI","short_pith_number":"pith:K7GCZWEW","schema_version":"1.0","canonical_sha256":"57cc2cd89641f6771afd57ed5b9b304a3af49edc738d4f3177f2fa6b92cac1e2","source":{"kind":"arxiv","id":"2606.01839","version":1},"attestation_state":"computed","paper":{"title":"Observation, Not Prediction: Conversation-Level Disaggregated Scheduling for Agentic Serving","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AR","cs.LG"],"primary_cat":"cs.DC","authors_text":"Henry Hoffmann, Jianru Ding, Mingyuan Xiang, Pouya Mahdi Gholami, Ryien Hosseini","submitted_at":"2026-06-01T07:51:09Z","abstract_excerpt":"LLM-based agents resolve a user task through many turns of dependent inference and tool calls, producing a workload whose total cost is unknown when the task arrives. Existing multi-turn systems keep the turn as the scheduling unit and decide, turn by turn, whether to disaggregate prefill from decode. That decision rests on the turn's decode length, tool behavior, and KV growth, quantities that are not observable when the scheduler must act, forcing the system to predict them. We show this dependence on prediction is imposed by the scheduling unit, not the workload. Raising the scheduling unit"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.01839","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DC","submitted_at":"2026-06-01T07:51:09Z","cross_cats_sorted":["cs.AR","cs.LG"],"title_canon_sha256":"4b1a3491fcc75013205e29596d22e4345548213c71afc05fa66ae711c1f01256","abstract_canon_sha256":"07774841b73cee11213ad689c4d29aca458b875653e38ed384fd04c80f3f6ea8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T02:04:58.208857Z","signature_b64":"maqFOXX3OUm+hYJkVJbqLKRWAKBYxzLFBh4/jAGW/5CP6JLHTahLVXS6PBPa59mCL4xZD1RpacswXAHM9d+tDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"57cc2cd89641f6771afd57ed5b9b304a3af49edc738d4f3177f2fa6b92cac1e2","last_reissued_at":"2026-06-02T02:04:58.208482Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T02:04:58.208482Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Observation, Not Prediction: Conversation-Level Disaggregated Scheduling for Agentic Serving","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AR","cs.LG"],"primary_cat":"cs.DC","authors_text":"Henry Hoffmann, Jianru Ding, Mingyuan Xiang, Pouya Mahdi Gholami, Ryien Hosseini","submitted_at":"2026-06-01T07:51:09Z","abstract_excerpt":"LLM-based agents resolve a user task through many turns of dependent inference and tool calls, producing a workload whose total cost is unknown when the task arrives. Existing multi-turn systems keep the turn as the scheduling unit and decide, turn by turn, whether to disaggregate prefill from decode. That decision rests on the turn's decode length, tool behavior, and KV growth, quantities that are not observable when the scheduler must act, forcing the system to predict them. We show this dependence on prediction is imposed by the scheduling unit, not the workload. Raising the scheduling unit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.01839","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.01839/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.01839","created_at":"2026-06-02T02:04:58.208538+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.01839v1","created_at":"2026-06-02T02:04:58.208538+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.01839","created_at":"2026-06-02T02:04:58.208538+00:00"},{"alias_kind":"pith_short_12","alias_value":"K7GCZWEWIH3H","created_at":"2026-06-02T02:04:58.208538+00:00"},{"alias_kind":"pith_short_16","alias_value":"K7GCZWEWIH3HOGX5","created_at":"2026-06-02T02:04:58.208538+00:00"},{"alias_kind":"pith_short_8","alias_value":"K7GCZWEW","created_at":"2026-06-02T02:04:58.208538+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI","json":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI.json","graph_json":"https://pith.science/api/pith-number/K7GCZWEWIH3HOGX5K7WVXGZQJI/graph.json","events_json":"https://pith.science/api/pith-number/K7GCZWEWIH3HOGX5K7WVXGZQJI/events.json","paper":"https://pith.science/paper/K7GCZWEW"},"agent_actions":{"view_html":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI","download_json":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI.json","view_paper":"https://pith.science/paper/K7GCZWEW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.01839&json=true","fetch_graph":"https://pith.science/api/pith-number/K7GCZWEWIH3HOGX5K7WVXGZQJI/graph.json","fetch_events":"https://pith.science/api/pith-number/K7GCZWEWIH3HOGX5K7WVXGZQJI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI/action/storage_attestation","attest_author":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI/action/author_attestation","sign_citation":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI/action/citation_signature","submit_replication":"https://pith.science/pith/K7GCZWEWIH3HOGX5K7WVXGZQJI/action/replication_record"}},"created_at":"2026-06-02T02:04:58.208538+00:00","updated_at":"2026-06-02T02:04:58.208538+00:00"}