{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:EA54DF5YPRNI3UHRFEJWUGWI46","short_pith_number":"pith:EA54DF5Y","schema_version":"1.0","canonical_sha256":"203bc197b87c5a8dd0f129136a1ac8e79ecd09496d518900247482745ef8c813","source":{"kind":"arxiv","id":"2410.15236","version":4},"attestation_state":"computed","paper":{"title":"Jailbreaking and Mitigation of Vulnerabilities in Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Benji Peng, Caitlyn Heqi Yin, Hanxuan Chen, Jiacheng Shi, Keyu Chen, Lawrence K.Q. Yan, Ming Liu, Pohsun Feng, Qian Niu, Riyang Bao, Tianyang Wang, Xinyuan Song, Yichao Zhang, Yizhu Wen, Ziqian Bi","submitted_at":"2024-10-20T00:00:56Z","abstract_excerpt":"Large Language Models (LLMs) have transformed artificial intelligence by advancing natural language understanding and generation, enabling applications across fields beyond healthcare, software engineering, and conversational systems. Despite these advancements in the past few years, LLMs have shown considerable vulnerabilities, particularly to prompt injection and jailbreaking attacks. This review analyzes the state of research on these vulnerabilities and presents available defense strategies. We roughly categorize attack approaches into prompt-based, model-based, multimodal, and multilingua"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2410.15236","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2024-10-20T00:00:56Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"37db135a3f03533ef0eb26641135e9346b3ead7df6bb07b801f8bbd4c0b4132d","abstract_canon_sha256":"6af61dd759e3ecedb66cdf5c91a433db7f65384effae5562a4e00f436a94d139"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T00:04:12.336399Z","signature_b64":"GZdotHQLtBeYodjGHeEU7GTBTDIpQi5CxlZZDUjxPMhZBocFEnDrHZaVtupuzlTwRpVkr7QuV6JBSNOVzP5RCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"203bc197b87c5a8dd0f129136a1ac8e79ecd09496d518900247482745ef8c813","last_reissued_at":"2026-05-29T00:04:12.335728Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T00:04:12.335728Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Jailbreaking and Mitigation of Vulnerabilities in Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Benji Peng, Caitlyn Heqi Yin, Hanxuan Chen, Jiacheng Shi, Keyu Chen, Lawrence K.Q. Yan, Ming Liu, Pohsun Feng, Qian Niu, Riyang Bao, Tianyang Wang, Xinyuan Song, Yichao Zhang, Yizhu Wen, Ziqian Bi","submitted_at":"2024-10-20T00:00:56Z","abstract_excerpt":"Large Language Models (LLMs) have transformed artificial intelligence by advancing natural language understanding and generation, enabling applications across fields beyond healthcare, software engineering, and conversational systems. Despite these advancements in the past few years, LLMs have shown considerable vulnerabilities, particularly to prompt injection and jailbreaking attacks. This review analyzes the state of research on these vulnerabilities and presents available defense strategies. We roughly categorize attack approaches into prompt-based, model-based, multimodal, and multilingua"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2410.15236","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2410.15236/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2410.15236","created_at":"2026-05-29T00:04:12.335830+00:00"},{"alias_kind":"arxiv_version","alias_value":"2410.15236v4","created_at":"2026-05-29T00:04:12.335830+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.15236","created_at":"2026-05-29T00:04:12.335830+00:00"},{"alias_kind":"pith_short_12","alias_value":"EA54DF5YPRNI","created_at":"2026-05-29T00:04:12.335830+00:00"},{"alias_kind":"pith_short_16","alias_value":"EA54DF5YPRNI3UHR","created_at":"2026-05-29T00:04:12.335830+00:00"},{"alias_kind":"pith_short_8","alias_value":"EA54DF5Y","created_at":"2026-05-29T00:04:12.335830+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":8,"sample":[{"citing_arxiv_id":"2409.18169","citing_title":"Harmful Fine-tuning Attacks and Defenses for Large Language Models: A Survey","ref_index":115,"is_internal_anchor":true},{"citing_arxiv_id":"2501.18416","citing_title":"Exploring Potential Prompt Injection Attacks in Federated Military LLMs and Their Mitigation","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18239","citing_title":"Multilingual jailbreaking of LLMs using low-resource languages","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15503","citing_title":"uGen: An Agentic Framework for Generating Microarchitectural Attack PoCs","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2510.22628","citing_title":"Sentra-Guard: A Real-Time Multilingual Defense Against Adversarial LLM Prompts","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05058","citing_title":"SoK: Robustness in Large Language Models against Jailbreak Attacks","ref_index":61,"is_internal_anchor":true},{"citing_arxiv_id":"2604.12168","citing_title":"Fully Homomorphic Encryption on Llama 3 model for privacy preserving LLM inference","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18775","citing_title":"An Empirical Study of Multi-Generation Sampling for Jailbreak Detection in Large Language Models","ref_index":9,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46","json":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46.json","graph_json":"https://pith.science/api/pith-number/EA54DF5YPRNI3UHRFEJWUGWI46/graph.json","events_json":"https://pith.science/api/pith-number/EA54DF5YPRNI3UHRFEJWUGWI46/events.json","paper":"https://pith.science/paper/EA54DF5Y"},"agent_actions":{"view_html":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46","download_json":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46.json","view_paper":"https://pith.science/paper/EA54DF5Y","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2410.15236&json=true","fetch_graph":"https://pith.science/api/pith-number/EA54DF5YPRNI3UHRFEJWUGWI46/graph.json","fetch_events":"https://pith.science/api/pith-number/EA54DF5YPRNI3UHRFEJWUGWI46/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46/action/storage_attestation","attest_author":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46/action/author_attestation","sign_citation":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46/action/citation_signature","submit_replication":"https://pith.science/pith/EA54DF5YPRNI3UHRFEJWUGWI46/action/replication_record"}},"created_at":"2026-05-29T00:04:12.335830+00:00","updated_at":"2026-05-29T00:04:12.335830+00:00"}