{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LINIHV6BSGVOALVGZL4NEWWHPX","short_pith_number":"pith:LINIHV6B","schema_version":"1.0","canonical_sha256":"5a1a83d7c191aae02ea6caf8d25ac77dfe80eb60d240d75e9b2d11fbe36b3058","source":{"kind":"arxiv","id":"2605.04842","version":2},"attestation_state":"computed","paper":{"title":"Communication Offloading on SmartNIC DPUs: A Quantitative Approach","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Offloading communication tasks to SmartNIC DPUs speeds up host-dominated workloads by up to 1.55x when the memory-to-communication ratio is high.","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Andong Hu, Ivy Peng, Jacob Wahlgren, Maya Gokhale, Roger Pearce","submitted_at":"2026-05-06T12:41:56Z","abstract_excerpt":"SmartNIC Data Processing Units (DPUs) offer a promising solution for saving high-end CPU resources by offloading tasks to programmable cores near the network interface. In this work, we explore the feasibility of SmartNIC DPUs in supporting an asynchronous communication model called \"fire-and-forget\", particularly its core message routing service. We design a communication offloading engine called Buddy that decouples communication tasks from the application process. Buddy runs flexibly on SmartNIC DPUs such as the Nvidia BlueField-3 DPU and generic x86 CPUs. Our evaluation results in five app"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.04842","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2026-05-06T12:41:56Z","cross_cats_sorted":[],"title_canon_sha256":"12737700caed647788080ecc510f326bb51b326f43ca3327584b4193d8fa81ba","abstract_canon_sha256":"3e2a8bdfe88fd22636149ca85e43a043b6cf65abc654dd239aa4872b51bb6541"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:21.678820Z","signature_b64":"ekPE2IFv4D217qYTRnEqGOEjZPXYNtR3rv4M3Cv85EC6WIZ2he10brBjqsp8zTlf2v5ZZGD08/9213VRLNeJAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5a1a83d7c191aae02ea6caf8d25ac77dfe80eb60d240d75e9b2d11fbe36b3058","last_reissued_at":"2026-05-25T02:01:21.678163Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:21.678163Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Communication Offloading on SmartNIC DPUs: A Quantitative Approach","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Offloading communication tasks to SmartNIC DPUs speeds up host-dominated workloads by up to 1.55x when the memory-to-communication ratio is high.","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Andong Hu, Ivy Peng, Jacob Wahlgren, Maya Gokhale, Roger Pearce","submitted_at":"2026-05-06T12:41:56Z","abstract_excerpt":"SmartNIC Data Processing Units (DPUs) offer a promising solution for saving high-end CPU resources by offloading tasks to programmable cores near the network interface. In this work, we explore the feasibility of SmartNIC DPUs in supporting an asynchronous communication model called \"fire-and-forget\", particularly its core message routing service. We design a communication offloading engine called Buddy that decouples communication tasks from the application process. Buddy runs flexibly on SmartNIC DPUs such as the Nvidia BlueField-3 DPU and generic x86 CPUs. Our evaluation results in five app"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Host-dominated workloads such as Quicksilver and Sparse Matrix Transpose achieved up to 1.55x speedup with communication offloaded to the DPU; we further identify a 625x increase in DRAM traffic due to the absence of Direct Cache Access support on the DPU.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The memory-to-communication ratio serves as a reliable predictor of offloading benefit and that the DPU can execute the Buddy engine with sufficiently low overhead to realize net gains in real applications.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Buddy offloads communication to DPUs for up to 1.55x speedup in host-dominated workloads but causes 625x more DRAM traffic without Direct Cache Access.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Offloading communication tasks to SmartNIC DPUs speeds up host-dominated workloads by up to 1.55x when the memory-to-communication ratio is high.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"560b0e35d0835e65678a14d1b66f9c4ecaf1135ff0b4b8dc4b9d3c62aa595464"},"source":{"id":"2605.04842","kind":"arxiv","version":2},"verdict":{"id":"c33eaadb-e66f-4226-b869-34f06ff3b102","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-08T17:20:32.066893Z","strongest_claim":"Host-dominated workloads such as Quicksilver and Sparse Matrix Transpose achieved up to 1.55x speedup with communication offloaded to the DPU; we further identify a 625x increase in DRAM traffic due to the absence of Direct Cache Access support on the DPU.","one_line_summary":"Buddy offloads communication to DPUs for up to 1.55x speedup in host-dominated workloads but causes 625x more DRAM traffic without Direct Cache Access.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The memory-to-communication ratio serves as a reliable predictor of offloading benefit and that the DPU can execute the Buddy engine with sufficiently low overhead to realize net gains in real applications.","pith_extraction_headline":"Offloading communication tasks to SmartNIC DPUs speeds up host-dominated workloads by up to 1.55x when the memory-to-communication ratio is high."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.04842/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-20T11:33:29.170906Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T22:01:28.913964Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T14:06:41.923187Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"0337c9dea7c1a2585218ea0421cc4bdc32dbea94686d9aac2015a380ed31d609"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.04842","created_at":"2026-05-25T02:01:21.678263+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.04842v2","created_at":"2026-05-25T02:01:21.678263+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.04842","created_at":"2026-05-25T02:01:21.678263+00:00"},{"alias_kind":"pith_short_12","alias_value":"LINIHV6BSGVO","created_at":"2026-05-25T02:01:21.678263+00:00"},{"alias_kind":"pith_short_16","alias_value":"LINIHV6BSGVOALVG","created_at":"2026-05-25T02:01:21.678263+00:00"},{"alias_kind":"pith_short_8","alias_value":"LINIHV6B","created_at":"2026-05-25T02:01:21.678263+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.07722","citing_title":"Post-Moore Technologies for Plasma Simulation: A Community Roadmap","ref_index":107,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX","json":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX.json","graph_json":"https://pith.science/api/pith-number/LINIHV6BSGVOALVGZL4NEWWHPX/graph.json","events_json":"https://pith.science/api/pith-number/LINIHV6BSGVOALVGZL4NEWWHPX/events.json","paper":"https://pith.science/paper/LINIHV6B"},"agent_actions":{"view_html":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX","download_json":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX.json","view_paper":"https://pith.science/paper/LINIHV6B","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.04842&json=true","fetch_graph":"https://pith.science/api/pith-number/LINIHV6BSGVOALVGZL4NEWWHPX/graph.json","fetch_events":"https://pith.science/api/pith-number/LINIHV6BSGVOALVGZL4NEWWHPX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX/action/storage_attestation","attest_author":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX/action/author_attestation","sign_citation":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX/action/citation_signature","submit_replication":"https://pith.science/pith/LINIHV6BSGVOALVGZL4NEWWHPX/action/replication_record"}},"created_at":"2026-05-25T02:01:21.678263+00:00","updated_at":"2026-05-25T02:01:21.678263+00:00"}