{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:NZ3ZUXOS6KDXJ5JDWZX74Z4WXP","short_pith_number":"pith:NZ3ZUXOS","canonical_record":{"source":{"id":"2604.01604","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","cross_cats_sorted":[],"title_canon_sha256":"45d17b11646f671d7c6e7bd81a30930871ba7d5b351cb06a4cbe89ccc0b701b5","abstract_canon_sha256":"637e7802656323d90f74ea8cf1783cbe3d2e1fd960fc4e2a264416acbb3ac71c"},"schema_version":"1.0"},"canonical_sha256":"6e779a5dd2f28774f523b66ffe6796bbd70c53e5a398c9172af2216f32fd1084","source":{"kind":"arxiv","id":"2604.01604","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.01604","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.01604v2","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.01604","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_12","alias_value":"NZ3ZUXOS6KDX","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_16","alias_value":"NZ3ZUXOS6KDXJ5JD","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_8","alias_value":"NZ3ZUXOS","created_at":"2026-05-28T01:04:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:NZ3ZUXOS6KDXJ5JDWZX74Z4WXP","target":"record","payload":{"canonical_record":{"source":{"id":"2604.01604","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","cross_cats_sorted":[],"title_canon_sha256":"45d17b11646f671d7c6e7bd81a30930871ba7d5b351cb06a4cbe89ccc0b701b5","abstract_canon_sha256":"637e7802656323d90f74ea8cf1783cbe3d2e1fd960fc4e2a264416acbb3ac71c"},"schema_version":"1.0"},"canonical_sha256":"6e779a5dd2f28774f523b66ffe6796bbd70c53e5a398c9172af2216f32fd1084","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:39.301894Z","signature_b64":"ryXbmxkmR95880Vjh2ksPTNPiJphEomFowSUkI3Kie2B6iRGbEhUmwoIbXtrE4XPZGC9HOmFz02L0/kHFRlKAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6e779a5dd2f28774f523b66ffe6796bbd70c53e5a398c9172af2216f32fd1084","last_reissued_at":"2026-05-28T01:04:39.301381Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:39.301381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.01604","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Erdy+vZm+tsgF0A+NmGV+0t2EloFw0wJl9+t6JzdTl+U2OqxetBzRMJzweWj4OADlEobiSJ00PSfUyCAXud2AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T23:01:32.221474Z"},"content_sha256":"becabc7c304d3a35e3d4ce930b33c5328e977c8f718db1aa1acb7522997ba394","schema_version":"1.0","event_id":"sha256:becabc7c304d3a35e3d4ce930b33c5328e977c8f718db1aa1acb7522997ba394"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:NZ3ZUXOS6KDXJ5JDWZX74Z4WXP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"CRaFT: Circuit-Guided Refusal Feature Selection via Cross-Layer Transcoders","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Hyundong Jin, Su-Hyeon Kim, Yejin Lee, Yo-Sub Han","submitted_at":"2026-04-02T04:28:11Z","abstract_excerpt":"While modern LLMs are aligned to refuse harmful requests, it is essential to understand the underlying mechanistic basis of this refusal behavior for model safety analysis. For example, steering-based jailbreak attacks exploit this by identifying and manipulating sparse, neuron-like refusal features to bypass safety guardrails. Current feature selection methods primarily rely on how strongly features activate on harmful prompts. However, activation strength alone often captures superficial heuristics such as topic or lexical cues, rather than the true causal mechanisms. Thus, selecting refusal"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.01604","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.01604/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T01:04:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3wodh2srrIVT2IrNjXXH7upx4HzkLKjOYUjAofm84WXDQC2xJeZTa6S9p8Fdfk4450/XpxldtdUJQlWDdaLeAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T23:01:32.221846Z"},"content_sha256":"a19e269b94457dd1bbd7319aa28840977f1d55b0fa597a9be126c16c1d63fd99","schema_version":"1.0","event_id":"sha256:a19e269b94457dd1bbd7319aa28840977f1d55b0fa597a9be126c16c1d63fd99"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NZ3ZUXOS6KDXJ5JDWZX74Z4WXP/bundle.json","state_url":"https://pith.science/pith/NZ3ZUXOS6KDXJ5JDWZX74Z4WXP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NZ3ZUXOS6KDXJ5JDWZX74Z4WXP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T23:01:32Z","links":{"resolver":"https://pith.science/pith/NZ3ZUXOS6KDXJ5JDWZX74Z4WXP","bundle":"https://pith.science/pith/NZ3ZUXOS6KDXJ5JDWZX74Z4WXP/bundle.json","state":"https://pith.science/pith/NZ3ZUXOS6KDXJ5JDWZX74Z4WXP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NZ3ZUXOS6KDXJ5JDWZX74Z4WXP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NZ3ZUXOS6KDXJ5JDWZX74Z4WXP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"637e7802656323d90f74ea8cf1783cbe3d2e1fd960fc4e2a264416acbb3ac71c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","title_canon_sha256":"45d17b11646f671d7c6e7bd81a30930871ba7d5b351cb06a4cbe89ccc0b701b5"},"schema_version":"1.0","source":{"id":"2604.01604","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.01604","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.01604v2","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.01604","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_12","alias_value":"NZ3ZUXOS6KDX","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_16","alias_value":"NZ3ZUXOS6KDXJ5JD","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_8","alias_value":"NZ3ZUXOS","created_at":"2026-05-28T01:04:39Z"}],"graph_snapshots":[{"event_id":"sha256:a19e269b94457dd1bbd7319aa28840977f1d55b0fa597a9be126c16c1d63fd99","target":"graph","created_at":"2026-05-28T01:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.01604/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While modern LLMs are aligned to refuse harmful requests, it is essential to understand the underlying mechanistic basis of this refusal behavior for model safety analysis. For example, steering-based jailbreak attacks exploit this by identifying and manipulating sparse, neuron-like refusal features to bypass safety guardrails. Current feature selection methods primarily rely on how strongly features activate on harmful prompts. However, activation strength alone often captures superficial heuristics such as topic or lexical cues, rather than the true causal mechanisms. Thus, selecting refusal","authors_text":"Hyundong Jin, Su-Hyeon Kim, Yejin Lee, Yo-Sub Han","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","title":"CRaFT: Circuit-Guided Refusal Feature Selection via Cross-Layer Transcoders"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.01604","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:becabc7c304d3a35e3d4ce930b33c5328e977c8f718db1aa1acb7522997ba394","target":"record","created_at":"2026-05-28T01:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"637e7802656323d90f74ea8cf1783cbe3d2e1fd960fc4e2a264416acbb3ac71c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","title_canon_sha256":"45d17b11646f671d7c6e7bd81a30930871ba7d5b351cb06a4cbe89ccc0b701b5"},"schema_version":"1.0","source":{"id":"2604.01604","kind":"arxiv","version":2}},"canonical_sha256":"6e779a5dd2f28774f523b66ffe6796bbd70c53e5a398c9172af2216f32fd1084","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6e779a5dd2f28774f523b66ffe6796bbd70c53e5a398c9172af2216f32fd1084","first_computed_at":"2026-05-28T01:04:39.301381Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:39.301381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ryXbmxkmR95880Vjh2ksPTNPiJphEomFowSUkI3Kie2B6iRGbEhUmwoIbXtrE4XPZGC9HOmFz02L0/kHFRlKAQ==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:39.301894Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.01604","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:becabc7c304d3a35e3d4ce930b33c5328e977c8f718db1aa1acb7522997ba394","sha256:a19e269b94457dd1bbd7319aa28840977f1d55b0fa597a9be126c16c1d63fd99"],"state_sha256":"56e4aa4f08595a1f67ee19b137b02008ba93d72552385780f88c98498d2b92f0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Cohib+hrZT/R9DXbVRwdGJFK0pIdzW2KXpoSdVVZEHIDPrPbV5v/Z4E6aPqV4LxGoAcBkfTGVNzJ47uWIJIGDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T23:01:32.223851Z","bundle_sha256":"c9874e1b65b948880d81616d47466062e910afeacea12164dcdca9d19647e601"}}