{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:AMZB2PK2OIIHM4LVTRFSKH5RSI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"12331aa25501eb215b232c8242d7d5d268c34924e775e461378931445601599e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-01-27T20:57:18Z","title_canon_sha256":"b9c6e9cb0c692d000881d31e45732bf54e9bca012ee1962914207934fab15ba2"},"schema_version":"1.0","source":{"id":"2501.16496","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.16496","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"arxiv_version","alias_value":"2501.16496v1","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.16496","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"pith_short_12","alias_value":"AMZB2PK2OIIH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"AMZB2PK2OIIHM4LV","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"AMZB2PK2","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:b296275ad05d05989de0b9067b0e510dfcbb13b21b84c0e5717988190f0626b3","target":"graph","created_at":"2026-05-17T23:39:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Progress in mechanistic interpretability promises greater assurance over AI system behavior and shed light on exciting scientific questions about the nature of intelligence, but many open problems require solutions before these benefits can be realized."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That solving the identified open problems in methods, applications, and socio-technical challenges will directly produce the promised scientific and engineering benefits."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A review paper that organizes conceptual, practical, and socio-technical open problems in mechanistic interpretability."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Mechanistic interpretability must solve open problems in methods, applications, and socio-technical challenges to achieve its goals of AI assurance and scientific insight."}],"snapshot_sha256":"7d4dfb5b0b1d119ebde1dcc51640c47d8a89f05bef124f224d3053073a780db3"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"efb565fc2dbc6f1aa1e1681ca94a0bd28653d2101d76a266eac508169ae5c88f"},"paper":{"abstract_excerpt":"Mechanistic interpretability aims to understand the computational mechanisms underlying neural networks' capabilities in order to accomplish concrete scientific and engineering goals. Progress in this field thus promises to provide greater assurance over AI system behavior and shed light on exciting scientific questions about the nature of intelligence. Despite recent progress toward these goals, there are many open problems in the field that require solutions before many scientific and practical benefits can be realized: Our methods require both conceptual and practical improvements to reveal","authors_text":"Adria Garriga-Alonso, Alejandro Ortega, Arthur Conmy, Atticus Geiger, Bilal Chughtai, Daniel Murfet, David Bau, Eric J. Michaud, Eric Todd, Jack Lindsey, Jeff Wu, Jesse Hoogland, Jessica Rumbelow, Joseph Bloom, Joseph Miller, Joshua Batson, Lee Sharkey, Lucius Bushnaq, Martin Wattenberg, Max Tegmark, Mor Geva, Nandi Schoots, Neel Nanda, Nicholas Goldowsky-Dill, Stefan Heimersheim, Stella Biderman, Stephen Casper, Tom McGrath, William Saunders","cross_cats":[],"headline":"Mechanistic interpretability must solve open problems in methods, applications, and socio-technical challenges to achieve its goals of AI assurance and scientific insight.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-01-27T20:57:18Z","title":"Open Problems in Mechanistic Interpretability"},"references":{"count":77,"internal_anchors":3,"resolved_work":77,"sample":[{"cited_arxiv_id":"","doi":"10.1073/pnas.1907375117","is_internal_anchor":false,"ref_index":1,"title":"Understanding the role of individual units in a deep neural network","work_id":"6b96f855-8b1d-4fc1-8171-8e1a6a16fea9","year":2024},{"cited_arxiv_id":"","doi":"10.23915/distill.00015","is_internal_anchor":false,"ref_index":2,"title":"https://distill.pub/2019/activation-atlas","work_id":"aa26f856-6ec2-4377-9256-be457f8d0629","year":2019},{"cited_arxiv_id":"","doi":"10.1162/tacl_a_00359","is_internal_anchor":false,"ref_index":3,"title":"Amnesic Probing: Behavioral Explanation with Amnesic Counterfactuals","work_id":"c60c9b79-a372-4ddc-a28f-4c6fa9d62204","year":2023},{"cited_arxiv_id":"","doi":"10.18653/v1/w16-2524","is_internal_anchor":false,"ref_index":4,"title":"Probing for semantic evidence of composition by means of simple classification tasks","work_id":"48f79331-577a-482b-b14d-fcad29a5a94c","year":2009},{"cited_arxiv_id":"","doi":"10.1145/3531146.3533074","is_internal_anchor":false,"ref_index":5,"title":"ISBN 9781450393522","work_id":"1bb029ce-48e8-4920-9a76-a9d1d929a6b7","year":2024}],"snapshot_sha256":"2d3b5cdfb4ce25045e3a3ba4b74499d79e9c838326bb6ce0bdfa813f196ef95e"},"source":{"id":"2501.16496","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T18:25:38.231306Z","id":"0af7e63b-80cf-417c-acdb-5819e542a978","model_set":{"reader":"grok-4.3"},"one_line_summary":"A review paper that organizes conceptual, practical, and socio-technical open problems in mechanistic interpretability.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Mechanistic interpretability must solve open problems in methods, applications, and socio-technical challenges to achieve its goals of AI assurance and scientific insight.","strongest_claim":"Progress in mechanistic interpretability promises greater assurance over AI system behavior and shed light on exciting scientific questions about the nature of intelligence, but many open problems require solutions before these benefits can be realized.","weakest_assumption":"That solving the identified open problems in methods, applications, and socio-technical challenges will directly produce the promised scientific and engineering benefits."}},"verdict_id":"0af7e63b-80cf-417c-acdb-5819e542a978"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f40a6cd406c6a85e01238e2dc7275052c3ed29974478a398380a5928151d7e55","target":"record","created_at":"2026-05-17T23:39:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"12331aa25501eb215b232c8242d7d5d268c34924e775e461378931445601599e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-01-27T20:57:18Z","title_canon_sha256":"b9c6e9cb0c692d000881d31e45732bf54e9bca012ee1962914207934fab15ba2"},"schema_version":"1.0","source":{"id":"2501.16496","kind":"arxiv","version":1}},"canonical_sha256":"03321d3d5a72107671759c4b251fb1922e88b10314cc0fd577a0fc72e6fa437b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"03321d3d5a72107671759c4b251fb1922e88b10314cc0fd577a0fc72e6fa437b","first_computed_at":"2026-05-17T23:39:22.194980Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:22.194980Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"coSWZXoJUDi9/bK3ewN8uUyIBemlgl/cTNgZfN/16M0Eu27jya2/LqaQucYE5ALIfGLwBKmMXIAxzAsT3GQwBA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:22.195798Z","signed_message":"canonical_sha256_bytes"},"source_id":"2501.16496","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f40a6cd406c6a85e01238e2dc7275052c3ed29974478a398380a5928151d7e55","sha256:b296275ad05d05989de0b9067b0e510dfcbb13b21b84c0e5717988190f0626b3"],"state_sha256":"12e5d4895cbe199d1f9f582827284fae3ccc4ebdf134fba83b51168772576b62"}