{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NZ3ZUXOS6KDXJ5JDWZX74Z4WXP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"637e7802656323d90f74ea8cf1783cbe3d2e1fd960fc4e2a264416acbb3ac71c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","title_canon_sha256":"45d17b11646f671d7c6e7bd81a30930871ba7d5b351cb06a4cbe89ccc0b701b5"},"schema_version":"1.0","source":{"id":"2604.01604","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.01604","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"arxiv_version","alias_value":"2604.01604v2","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.01604","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_12","alias_value":"NZ3ZUXOS6KDX","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_16","alias_value":"NZ3ZUXOS6KDXJ5JD","created_at":"2026-05-28T01:04:39Z"},{"alias_kind":"pith_short_8","alias_value":"NZ3ZUXOS","created_at":"2026-05-28T01:04:39Z"}],"graph_snapshots":[{"event_id":"sha256:a19e269b94457dd1bbd7319aa28840977f1d55b0fa597a9be126c16c1d63fd99","target":"graph","created_at":"2026-05-28T01:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.01604/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While modern LLMs are aligned to refuse harmful requests, it is essential to understand the underlying mechanistic basis of this refusal behavior for model safety analysis. For example, steering-based jailbreak attacks exploit this by identifying and manipulating sparse, neuron-like refusal features to bypass safety guardrails. Current feature selection methods primarily rely on how strongly features activate on harmful prompts. However, activation strength alone often captures superficial heuristics such as topic or lexical cues, rather than the true causal mechanisms. Thus, selecting refusal","authors_text":"Hyundong Jin, Su-Hyeon Kim, Yejin Lee, Yo-Sub Han","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","title":"CRaFT: Circuit-Guided Refusal Feature Selection via Cross-Layer Transcoders"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.01604","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:becabc7c304d3a35e3d4ce930b33c5328e977c8f718db1aa1acb7522997ba394","target":"record","created_at":"2026-05-28T01:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"637e7802656323d90f74ea8cf1783cbe3d2e1fd960fc4e2a264416acbb3ac71c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-02T04:28:11Z","title_canon_sha256":"45d17b11646f671d7c6e7bd81a30930871ba7d5b351cb06a4cbe89ccc0b701b5"},"schema_version":"1.0","source":{"id":"2604.01604","kind":"arxiv","version":2}},"canonical_sha256":"6e779a5dd2f28774f523b66ffe6796bbd70c53e5a398c9172af2216f32fd1084","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6e779a5dd2f28774f523b66ffe6796bbd70c53e5a398c9172af2216f32fd1084","first_computed_at":"2026-05-28T01:04:39.301381Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:04:39.301381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ryXbmxkmR95880Vjh2ksPTNPiJphEomFowSUkI3Kie2B6iRGbEhUmwoIbXtrE4XPZGC9HOmFz02L0/kHFRlKAQ==","signature_status":"signed_v1","signed_at":"2026-05-28T01:04:39.301894Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.01604","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:becabc7c304d3a35e3d4ce930b33c5328e977c8f718db1aa1acb7522997ba394","sha256:a19e269b94457dd1bbd7319aa28840977f1d55b0fa597a9be126c16c1d63fd99"],"state_sha256":"56e4aa4f08595a1f67ee19b137b02008ba93d72552385780f88c98498d2b92f0"}