{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:6B2TDGGXZGX7EA5TUHYKINQKJ5","short_pith_number":"pith:6B2TDGGX","schema_version":"1.0","canonical_sha256":"f0753198d7c9aff203b3a1f0a4360a4f7f06a46fbd04855d9e9c5830206de12f","source":{"kind":"arxiv","id":"2602.06911","version":2},"attestation_state":"computed","paper":{"title":"TamperBench: Systematically Stress-Testing LLM Safety Under Fine-Tuning and Tampering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Kellin Pelrine, Matthew Kowal, Nayeema Nonta, Punya Syon Pandey, Saad Hossain, Samanvay Vajpayee, Samuel Simko, Sirisha Rambhatla, Stephen Casper, Tom Tseng, Zhijing Jin","submitted_at":"2026-02-06T18:04:38Z","abstract_excerpt":"As increasingly capable open-weight large language models (LLMs) are deployed, improving their tamper resistance against unsafe modifications, whether accidental or intentional, becomes critical to minimize risks. However, there is no standard approach to evaluate tamper resistance. Varied datasets, metrics, and tampering configurations make it difficult to compare safety, utility, and robustness across different models and defenses. To address this, we introduce TamperBench, the first unified framework to systematically evaluate the tamper resistance of LLMs. TamperBench (i) curates a reposit"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.06911","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-02-06T18:04:38Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"8d0a491832e4ad40d573beb91435981c2fb3d50f36285267b8f0a17a462b7db3","abstract_canon_sha256":"57fbf068fbf9b7ca11abd9c2303e7829ac31dc91db073e4c0473960ddf51aa67"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-04T01:08:41.500731Z","signature_b64":"WLrKbwy6fdTiBoCBf1b7pHAri6OaMNimpwMN23bYCKhytsjS/NVzbvHmsAtaMHCD45nxYdt3ptFpO96clpe2CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f0753198d7c9aff203b3a1f0a4360a4f7f06a46fbd04855d9e9c5830206de12f","last_reissued_at":"2026-06-04T01:08:41.499998Z","signature_status":"signed_v1","first_computed_at":"2026-06-04T01:08:41.499998Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TamperBench: Systematically Stress-Testing LLM Safety Under Fine-Tuning and Tampering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Kellin Pelrine, Matthew Kowal, Nayeema Nonta, Punya Syon Pandey, Saad Hossain, Samanvay Vajpayee, Samuel Simko, Sirisha Rambhatla, Stephen Casper, Tom Tseng, Zhijing Jin","submitted_at":"2026-02-06T18:04:38Z","abstract_excerpt":"As increasingly capable open-weight large language models (LLMs) are deployed, improving their tamper resistance against unsafe modifications, whether accidental or intentional, becomes critical to minimize risks. However, there is no standard approach to evaluate tamper resistance. Varied datasets, metrics, and tampering configurations make it difficult to compare safety, utility, and robustness across different models and defenses. To address this, we introduce TamperBench, the first unified framework to systematically evaluate the tamper resistance of LLMs. TamperBench (i) curates a reposit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.06911","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.06911/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.06911","created_at":"2026-06-04T01:08:41.500074+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.06911v2","created_at":"2026-06-04T01:08:41.500074+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.06911","created_at":"2026-06-04T01:08:41.500074+00:00"},{"alias_kind":"pith_short_12","alias_value":"6B2TDGGXZGX7","created_at":"2026-06-04T01:08:41.500074+00:00"},{"alias_kind":"pith_short_16","alias_value":"6B2TDGGXZGX7EA5T","created_at":"2026-06-04T01:08:41.500074+00:00"},{"alias_kind":"pith_short_8","alias_value":"6B2TDGGX","created_at":"2026-06-04T01:08:41.500074+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5","json":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5.json","graph_json":"https://pith.science/api/pith-number/6B2TDGGXZGX7EA5TUHYKINQKJ5/graph.json","events_json":"https://pith.science/api/pith-number/6B2TDGGXZGX7EA5TUHYKINQKJ5/events.json","paper":"https://pith.science/paper/6B2TDGGX"},"agent_actions":{"view_html":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5","download_json":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5.json","view_paper":"https://pith.science/paper/6B2TDGGX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.06911&json=true","fetch_graph":"https://pith.science/api/pith-number/6B2TDGGXZGX7EA5TUHYKINQKJ5/graph.json","fetch_events":"https://pith.science/api/pith-number/6B2TDGGXZGX7EA5TUHYKINQKJ5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5/action/storage_attestation","attest_author":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5/action/author_attestation","sign_citation":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5/action/citation_signature","submit_replication":"https://pith.science/pith/6B2TDGGXZGX7EA5TUHYKINQKJ5/action/replication_record"}},"created_at":"2026-06-04T01:08:41.500074+00:00","updated_at":"2026-06-04T01:08:41.500074+00:00"}