{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:HW4JON2WOPUYOXX3QPUELHHWK7","short_pith_number":"pith:HW4JON2W","schema_version":"1.0","canonical_sha256":"3db897375673e9875efb83e8459cf657ec501e0b61786b959a610fcb118a8f31","source":{"kind":"arxiv","id":"1810.05291","version":3},"attestation_state":"computed","paper":{"title":"signSGD with Majority Vote is Communication Efficient And Fault Tolerant","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.DC","authors_text":"Anima Anandkumar, Jeremy Bernstein, Jiawei Zhao, Kamyar Azizzadenesheli","submitted_at":"2018-10-11T23:50:32Z","abstract_excerpt":"Training neural networks on large datasets can be accelerated by distributing the workload over a network of machines. As datasets grow ever larger, networks of hundreds or thousands of machines become economically viable. The time cost of communicating gradients limits the effectiveness of using such large machine counts, as may the increased chance of network faults. We explore a particularly simple algorithm for robust, communication-efficient learning---signSGD. Workers transmit only the sign of their gradient vector to a server, and the overall update is decided by a majority vote. This a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1810.05291","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2018-10-11T23:50:32Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"2bb9c448ab13dcdafaf8d7ad2768aa54195ed00b06c7bbb2c8204a97a1b49427","abstract_canon_sha256":"4130cefec217a905f29df227deddde6fc180dd2ad5cc4aa36517390e060eb799"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:51.935675Z","signature_b64":"Zi2eMwbEri8LgN7S2v4B98u/ME+a5kbQDuIqyFcOyKcLYeXkwHttilx+ewflK3re0fuIia5bMsiY2sWLT2ShAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3db897375673e9875efb83e8459cf657ec501e0b61786b959a610fcb118a8f31","last_reissued_at":"2026-05-17T23:52:51.934893Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:51.934893Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"signSGD with Majority Vote is Communication Efficient And Fault Tolerant","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.DC","authors_text":"Anima Anandkumar, Jeremy Bernstein, Jiawei Zhao, Kamyar Azizzadenesheli","submitted_at":"2018-10-11T23:50:32Z","abstract_excerpt":"Training neural networks on large datasets can be accelerated by distributing the workload over a network of machines. As datasets grow ever larger, networks of hundreds or thousands of machines become economically viable. The time cost of communicating gradients limits the effectiveness of using such large machine counts, as may the increased chance of network faults. We explore a particularly simple algorithm for robust, communication-efficient learning---signSGD. Workers transmit only the sign of their gradient vector to a server, and the overall update is decided by a majority vote. This a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.05291","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1810.05291","created_at":"2026-05-17T23:52:51.935035+00:00"},{"alias_kind":"arxiv_version","alias_value":"1810.05291v3","created_at":"2026-05-17T23:52:51.935035+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.05291","created_at":"2026-05-17T23:52:51.935035+00:00"},{"alias_kind":"pith_short_12","alias_value":"HW4JON2WOPUY","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_16","alias_value":"HW4JON2WOPUYOXX3","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_8","alias_value":"HW4JON2W","created_at":"2026-05-18T12:32:28.185984+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"2606.20344","citing_title":"Quantum ring all-reduce: communication and privacy advantages for distributed learning","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2607.01755","citing_title":"Decentralized Stochastic Subgradient-type Methods with Communication Compression for Nonsmooth Nonconvex Optimization","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19811","citing_title":"LionMuon: Alternating Spectral and Sign Descent for Efficient Training","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16311","citing_title":"SignMuon: Communication-Efficient Distributed Muon Optimization","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19811","citing_title":"LionMuon: Alternating Spectral and Sign Descent for Efficient Training","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2604.09489","citing_title":"XFED: Non-Collusive Model Poisoning Attack Against Byzantine-Robust Federated Classifiers","ref_index":4,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07634","citing_title":"Robust stochastic first order methods in heavy-tailed noise via medoid mini-batch gradient sampling","ref_index":6,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7","json":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7.json","graph_json":"https://pith.science/api/pith-number/HW4JON2WOPUYOXX3QPUELHHWK7/graph.json","events_json":"https://pith.science/api/pith-number/HW4JON2WOPUYOXX3QPUELHHWK7/events.json","paper":"https://pith.science/paper/HW4JON2W"},"agent_actions":{"view_html":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7","download_json":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7.json","view_paper":"https://pith.science/paper/HW4JON2W","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1810.05291&json=true","fetch_graph":"https://pith.science/api/pith-number/HW4JON2WOPUYOXX3QPUELHHWK7/graph.json","fetch_events":"https://pith.science/api/pith-number/HW4JON2WOPUYOXX3QPUELHHWK7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7/action/storage_attestation","attest_author":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7/action/author_attestation","sign_citation":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7/action/citation_signature","submit_replication":"https://pith.science/pith/HW4JON2WOPUYOXX3QPUELHHWK7/action/replication_record"}},"created_at":"2026-05-17T23:52:51.935035+00:00","updated_at":"2026-05-17T23:52:51.935035+00:00"}