{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:WK5HZ5QFKHAVVOEDEY6FOAIWWJ","short_pith_number":"pith:WK5HZ5QF","schema_version":"1.0","canonical_sha256":"b2ba7cf60551c15ab883263c570116b26cfbb87b7f859c0d89a80f5638b59a73","source":{"kind":"arxiv","id":"2601.19921","version":2},"attestation_state":"computed","paper":{"title":"Demystifying Multi-Agent Debate: The Role of Confidence and Diversity","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Andreas Vlachos, Caiqi Zhang, Nigel Collier, Tom Stafford, Xiaochen Zhu, Yizhou Chi","submitted_at":"2026-01-09T02:38:30Z","abstract_excerpt":"Multi-agent debate (MAD) is widely used to improve large language model (LLM) performance through test-time scaling, yet recent work shows that vanilla MAD often underperforms simple majority vote despite higher computational cost. Studies show that, under homogeneous agents and uniform belief updates, debate preserves expected correctness and therefore cannot reliably improve outcomes. Drawing on findings from human deliberation and collective decision-making, we identify two key mechanisms missing from vanilla MAD: (i) diversity of initial viewpoints and (ii) explicit, calibrated confidence "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2601.19921","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-01-09T02:38:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a6d8131feaedfa39227e9afe089186f7f9cd049b370c3ccded2005db3871f3b8","abstract_canon_sha256":"274f73f2163f1b46998921186ae6cd33cff4f3babfd01b2ee93eca57c2bb9c23"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T03:04:37.273680Z","signature_b64":"QY8fIww96SpT+7ZyfRE1DkWd8lHH82TgQFvhm2MkjRNEXRWpH/zssGOFT6G8vTkkRwNA0MEatpiynDwknd3xDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b2ba7cf60551c15ab883263c570116b26cfbb87b7f859c0d89a80f5638b59a73","last_reissued_at":"2026-06-02T03:04:37.273136Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T03:04:37.273136Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Demystifying Multi-Agent Debate: The Role of Confidence and Diversity","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Andreas Vlachos, Caiqi Zhang, Nigel Collier, Tom Stafford, Xiaochen Zhu, Yizhou Chi","submitted_at":"2026-01-09T02:38:30Z","abstract_excerpt":"Multi-agent debate (MAD) is widely used to improve large language model (LLM) performance through test-time scaling, yet recent work shows that vanilla MAD often underperforms simple majority vote despite higher computational cost. Studies show that, under homogeneous agents and uniform belief updates, debate preserves expected correctness and therefore cannot reliably improve outcomes. Drawing on findings from human deliberation and collective decision-making, we identify two key mechanisms missing from vanilla MAD: (i) diversity of initial viewpoints and (ii) explicit, calibrated confidence "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.19921","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.19921/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2601.19921","created_at":"2026-06-02T03:04:37.273206+00:00"},{"alias_kind":"arxiv_version","alias_value":"2601.19921v2","created_at":"2026-06-02T03:04:37.273206+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.19921","created_at":"2026-06-02T03:04:37.273206+00:00"},{"alias_kind":"pith_short_12","alias_value":"WK5HZ5QFKHAV","created_at":"2026-06-02T03:04:37.273206+00:00"},{"alias_kind":"pith_short_16","alias_value":"WK5HZ5QFKHAVVOED","created_at":"2026-06-02T03:04:37.273206+00:00"},{"alias_kind":"pith_short_8","alias_value":"WK5HZ5QF","created_at":"2026-06-02T03:04:37.273206+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2605.23099","citing_title":"SVR-MAD: A Bayesian-Inspired Framework for Posterior-Guided Multi-Agent Debate","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2603.27771","citing_title":"Emergent Social Intelligence Risks in Generative Multi-Agent Systems","ref_index":144,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01704","citing_title":"The Reasoning Trap: An Information-Theoretic Bound on Closed-System Multi-Step LLM Reasoning","ref_index":16,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ","json":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ.json","graph_json":"https://pith.science/api/pith-number/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/graph.json","events_json":"https://pith.science/api/pith-number/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/events.json","paper":"https://pith.science/paper/WK5HZ5QF"},"agent_actions":{"view_html":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ","download_json":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ.json","view_paper":"https://pith.science/paper/WK5HZ5QF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2601.19921&json=true","fetch_graph":"https://pith.science/api/pith-number/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/graph.json","fetch_events":"https://pith.science/api/pith-number/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/action/storage_attestation","attest_author":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/action/author_attestation","sign_citation":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/action/citation_signature","submit_replication":"https://pith.science/pith/WK5HZ5QFKHAVVOEDEY6FOAIWWJ/action/replication_record"}},"created_at":"2026-06-02T03:04:37.273206+00:00","updated_at":"2026-06-02T03:04:37.273206+00:00"}