{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:IF4G3HBKUCWP3KLIGOLWGIE5XE","short_pith_number":"pith:IF4G3HBK","canonical_record":{"source":{"id":"2605.01133","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-01T22:15:11Z","cross_cats_sorted":["cs.LG","cs.MA"],"title_canon_sha256":"1d8a766fb924ff97a892827ec97621ba6e97335a7d533024b79aa555ac6c5c46","abstract_canon_sha256":"0272b13a1dd78c81064a1c140c14bc0dac2f299b4e22448b4db55ff8e20058a7"},"schema_version":"1.0"},"canonical_sha256":"41786d9c2aa0acfda968339763209db930962be7810cd39b2b9dd216bd656cd0","source":{"kind":"arxiv","id":"2605.01133","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.01133","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"arxiv_version","alias_value":"2605.01133v2","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.01133","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"pith_short_12","alias_value":"IF4G3HBKUCWP","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"pith_short_16","alias_value":"IF4G3HBKUCWP3KLI","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"pith_short_8","alias_value":"IF4G3HBK","created_at":"2026-06-23T01:13:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:IF4G3HBKUCWP3KLIGOLWGIE5XE","target":"record","payload":{"canonical_record":{"source":{"id":"2605.01133","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-01T22:15:11Z","cross_cats_sorted":["cs.LG","cs.MA"],"title_canon_sha256":"1d8a766fb924ff97a892827ec97621ba6e97335a7d533024b79aa555ac6c5c46","abstract_canon_sha256":"0272b13a1dd78c81064a1c140c14bc0dac2f299b4e22448b4db55ff8e20058a7"},"schema_version":"1.0"},"canonical_sha256":"41786d9c2aa0acfda968339763209db930962be7810cd39b2b9dd216bd656cd0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:13:05.319426Z","signature_b64":"5LGL4pcqf40iJ4a4DSuHW7LE5GcYKIuaKwmd/rBRNNTjRxmh+Fy/xzxHh/A0gsMzlcbJBBr1JOH85MSy4mLmBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"41786d9c2aa0acfda968339763209db930962be7810cd39b2b9dd216bd656cd0","last_reissued_at":"2026-06-23T01:13:05.318866Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:13:05.318866Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.01133","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:13:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cTgaSxtZ6Nlirkyv6DwhEUBDJSEpF5ZSOjHhFcl6FvH+0O5121t1gVe+LNQtpwmNWotKQ6PksNU6tkgYKi/bBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T20:29:50.385711Z"},"content_sha256":"b62066b908fc45069c78e1010bdce4ad1ef67c60ed6f3bc700606d5e249a0a3e","schema_version":"1.0","event_id":"sha256:b62066b908fc45069c78e1010bdce4ad1ef67c60ed6f3bc700606d5e249a0a3e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:IF4G3HBKUCWP3KLIGOLWGIE5XE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"When Embedding-Based Defenses Fail: Rethinking Safety in LLM-Based Multi-Agent Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Embedding-based defenses in LLM multi-agent systems fail when attackers craft messages whose embeddings lie close to benign ones, but token confidence scores provide a workable alternative for pruning suspicious messages.","cross_cats":["cs.LG","cs.MA"],"primary_cat":"cs.CR","authors_text":"Guangtao Zheng, Hanjie Chen, Lingxi Zhang","submitted_at":"2026-05-01T22:15:11Z","abstract_excerpt":"Large language model (LLM)-powered multi-agent systems (MAS) enable agents to communicate and share information, achieving strong performance on complex tasks. However, this communication also creates an attack surface where malicious agents can propagate misinformation and manipulate group decisions, undermining MAS safety. Existing embedding-based defenses aim to detect and prune suspicious agents, but their effectiveness depends on a clear separation between the text embeddings of malicious and benign messages. Attackers can circumvent such defenses by crafting messages whose embeddings lie"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Existing embedding-based defenses aim to detect and prune suspicious agents, but their effectiveness depends on a clear separation between the text embeddings of malicious and benign messages. Attackers can circumvent such defenses by crafting messages whose embeddings lie close to benign ones. We propose using confidence scores to prune or down-weight messages during MAS communication. Experiments show improved robustness across models, datasets, and communication topologies.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That token-level confidence signals such as logits remain informative and separable when text embeddings are no longer distinguishable under the proposed attacks.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Embedding-based defenses fail against attacks that align malicious message embeddings with benign ones in LLM multi-agent systems, but token-level confidence scores improve robustness by enabling better pruning of suspicious messages.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Embedding-based defenses in LLM multi-agent systems fail when attackers craft messages whose embeddings lie close to benign ones, but token confidence scores provide a workable alternative for pruning suspicious messages.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"5333ff60ca9523d8de4dc748f485ec27e38b9d4c8bbf80e8ad30871e28c2fedf"},"source":{"id":"2605.01133","kind":"arxiv","version":2},"verdict":{"id":"9135447b-3cda-4ab7-bdb4-be0e30eef9a8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-09T18:31:18.443207Z","strongest_claim":"Existing embedding-based defenses aim to detect and prune suspicious agents, but their effectiveness depends on a clear separation between the text embeddings of malicious and benign messages. Attackers can circumvent such defenses by crafting messages whose embeddings lie close to benign ones. We propose using confidence scores to prune or down-weight messages during MAS communication. Experiments show improved robustness across models, datasets, and communication topologies.","one_line_summary":"Embedding-based defenses fail against attacks that align malicious message embeddings with benign ones in LLM multi-agent systems, but token-level confidence scores improve robustness by enabling better pruning of suspicious messages.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That token-level confidence signals such as logits remain informative and separable when text embeddings are no longer distinguishable under the proposed attacks.","pith_extraction_headline":"Embedding-based defenses in LLM multi-agent systems fail when attackers craft messages whose embeddings lie close to benign ones, but token confidence scores provide a workable alternative for pruning suspicious messages."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.01133/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-20T18:38:20.168329Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T17:32:37.297157Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"ff91b0fff7c6e47c8dc4d93df8fb3bcaba6b04a45ac6cf0a180332e8796f0070"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"9135447b-3cda-4ab7-bdb4-be0e30eef9a8"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:13:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mg8SkyI00wvZ8l37H79o8RIRLk2xFrM4oKCCEJ2lHgoTIHqwtpp4nN0VpKkEAWFcMb8PaLgdyHTfEVZ9rEGICw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T20:29:50.386226Z"},"content_sha256":"414114b0081c27ef942ec23a183b6c182f051f273bebaf12070aad8035eb80a3","schema_version":"1.0","event_id":"sha256:414114b0081c27ef942ec23a183b6c182f051f273bebaf12070aad8035eb80a3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IF4G3HBKUCWP3KLIGOLWGIE5XE/bundle.json","state_url":"https://pith.science/pith/IF4G3HBKUCWP3KLIGOLWGIE5XE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IF4G3HBKUCWP3KLIGOLWGIE5XE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T20:29:50Z","links":{"resolver":"https://pith.science/pith/IF4G3HBKUCWP3KLIGOLWGIE5XE","bundle":"https://pith.science/pith/IF4G3HBKUCWP3KLIGOLWGIE5XE/bundle.json","state":"https://pith.science/pith/IF4G3HBKUCWP3KLIGOLWGIE5XE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IF4G3HBKUCWP3KLIGOLWGIE5XE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:IF4G3HBKUCWP3KLIGOLWGIE5XE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0272b13a1dd78c81064a1c140c14bc0dac2f299b4e22448b4db55ff8e20058a7","cross_cats_sorted":["cs.LG","cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-01T22:15:11Z","title_canon_sha256":"1d8a766fb924ff97a892827ec97621ba6e97335a7d533024b79aa555ac6c5c46"},"schema_version":"1.0","source":{"id":"2605.01133","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.01133","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"arxiv_version","alias_value":"2605.01133v2","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.01133","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"pith_short_12","alias_value":"IF4G3HBKUCWP","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"pith_short_16","alias_value":"IF4G3HBKUCWP3KLI","created_at":"2026-06-23T01:13:05Z"},{"alias_kind":"pith_short_8","alias_value":"IF4G3HBK","created_at":"2026-06-23T01:13:05Z"}],"graph_snapshots":[{"event_id":"sha256:414114b0081c27ef942ec23a183b6c182f051f273bebaf12070aad8035eb80a3","target":"graph","created_at":"2026-06-23T01:13:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Existing embedding-based defenses aim to detect and prune suspicious agents, but their effectiveness depends on a clear separation between the text embeddings of malicious and benign messages. Attackers can circumvent such defenses by crafting messages whose embeddings lie close to benign ones. We propose using confidence scores to prune or down-weight messages during MAS communication. Experiments show improved robustness across models, datasets, and communication topologies."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That token-level confidence signals such as logits remain informative and separable when text embeddings are no longer distinguishable under the proposed attacks."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Embedding-based defenses fail against attacks that align malicious message embeddings with benign ones in LLM multi-agent systems, but token-level confidence scores improve robustness by enabling better pruning of suspicious messages."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Embedding-based defenses in LLM multi-agent systems fail when attackers craft messages whose embeddings lie close to benign ones, but token confidence scores provide a workable alternative for pruning suspicious messages."}],"snapshot_sha256":"5333ff60ca9523d8de4dc748f485ec27e38b9d4c8bbf80e8ad30871e28c2fedf"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-20T18:38:20.168329Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T17:32:37.297157Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.01133/integrity.json","findings":[],"snapshot_sha256":"ff91b0fff7c6e47c8dc4d93df8fb3bcaba6b04a45ac6cf0a180332e8796f0070","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language model (LLM)-powered multi-agent systems (MAS) enable agents to communicate and share information, achieving strong performance on complex tasks. However, this communication also creates an attack surface where malicious agents can propagate misinformation and manipulate group decisions, undermining MAS safety. Existing embedding-based defenses aim to detect and prune suspicious agents, but their effectiveness depends on a clear separation between the text embeddings of malicious and benign messages. Attackers can circumvent such defenses by crafting messages whose embeddings lie","authors_text":"Guangtao Zheng, Hanjie Chen, Lingxi Zhang","cross_cats":["cs.LG","cs.MA"],"headline":"Embedding-based defenses in LLM multi-agent systems fail when attackers craft messages whose embeddings lie close to benign ones, but token confidence scores provide a workable alternative for pruning suspicious messages.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-01T22:15:11Z","title":"When Embedding-Based Defenses Fail: Rethinking Safety in LLM-Based Multi-Agent Systems"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.01133","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-09T18:31:18.443207Z","id":"9135447b-3cda-4ab7-bdb4-be0e30eef9a8","model_set":{"reader":"grok-4.3"},"one_line_summary":"Embedding-based defenses fail against attacks that align malicious message embeddings with benign ones in LLM multi-agent systems, but token-level confidence scores improve robustness by enabling better pruning of suspicious messages.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Embedding-based defenses in LLM multi-agent systems fail when attackers craft messages whose embeddings lie close to benign ones, but token confidence scores provide a workable alternative for pruning suspicious messages.","strongest_claim":"Existing embedding-based defenses aim to detect and prune suspicious agents, but their effectiveness depends on a clear separation between the text embeddings of malicious and benign messages. Attackers can circumvent such defenses by crafting messages whose embeddings lie close to benign ones. We propose using confidence scores to prune or down-weight messages during MAS communication. Experiments show improved robustness across models, datasets, and communication topologies.","weakest_assumption":"That token-level confidence signals such as logits remain informative and separable when text embeddings are no longer distinguishable under the proposed attacks."}},"verdict_id":"9135447b-3cda-4ab7-bdb4-be0e30eef9a8"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b62066b908fc45069c78e1010bdce4ad1ef67c60ed6f3bc700606d5e249a0a3e","target":"record","created_at":"2026-06-23T01:13:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0272b13a1dd78c81064a1c140c14bc0dac2f299b4e22448b4db55ff8e20058a7","cross_cats_sorted":["cs.LG","cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-05-01T22:15:11Z","title_canon_sha256":"1d8a766fb924ff97a892827ec97621ba6e97335a7d533024b79aa555ac6c5c46"},"schema_version":"1.0","source":{"id":"2605.01133","kind":"arxiv","version":2}},"canonical_sha256":"41786d9c2aa0acfda968339763209db930962be7810cd39b2b9dd216bd656cd0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"41786d9c2aa0acfda968339763209db930962be7810cd39b2b9dd216bd656cd0","first_computed_at":"2026-06-23T01:13:05.318866Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T01:13:05.318866Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5LGL4pcqf40iJ4a4DSuHW7LE5GcYKIuaKwmd/rBRNNTjRxmh+Fy/xzxHh/A0gsMzlcbJBBr1JOH85MSy4mLmBA==","signature_status":"signed_v1","signed_at":"2026-06-23T01:13:05.319426Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.01133","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b62066b908fc45069c78e1010bdce4ad1ef67c60ed6f3bc700606d5e249a0a3e","sha256:414114b0081c27ef942ec23a183b6c182f051f273bebaf12070aad8035eb80a3"],"state_sha256":"b5bd57a5655ed508c978c57efbbf817504deb7e6ecba8a45697aff1a7c98e7d0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QbozHN6rTHpqk7RjXuAcgKqLWD4FaPMOlGlXaqHdjurWC0mFV1yvu3nB0qPc7aEd8BbgUek2JPzLogDAHyeNAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T20:29:50.388730Z","bundle_sha256":"97e5ef6b796ee34f7db880d48f5cce8aafd3535c528ce341c9de9bae29f84c7a"}}