{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:IJIS7JF7KANJSIA7E2LZLYDQRJ","short_pith_number":"pith:IJIS7JF7","canonical_record":{"source":{"id":"2606.32038","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-30T17:59:32Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"f5cd3d38c14be2ac271daa40f8c7c712f859eead97b56826b54ef4f9a316b4b1","abstract_canon_sha256":"c9930e941d73e7c336a5e6edcf0c293963c26d9703ca69251b524fc205169d08"},"schema_version":"1.0"},"canonical_sha256":"42512fa4bf501a99201f269795e0708a42f054cf2340e532911e95449e652531","source":{"kind":"arxiv","id":"2606.32038","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.32038","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"arxiv_version","alias_value":"2606.32038v1","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.32038","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"pith_short_12","alias_value":"IJIS7JF7KANJ","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"pith_short_16","alias_value":"IJIS7JF7KANJSIA7","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"pith_short_8","alias_value":"IJIS7JF7","created_at":"2026-07-01T02:17:47Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:IJIS7JF7KANJSIA7E2LZLYDQRJ","target":"record","payload":{"canonical_record":{"source":{"id":"2606.32038","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-30T17:59:32Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"f5cd3d38c14be2ac271daa40f8c7c712f859eead97b56826b54ef4f9a316b4b1","abstract_canon_sha256":"c9930e941d73e7c336a5e6edcf0c293963c26d9703ca69251b524fc205169d08"},"schema_version":"1.0"},"canonical_sha256":"42512fa4bf501a99201f269795e0708a42f054cf2340e532911e95449e652531","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T02:17:47.974323Z","signature_b64":"ETJTgXQ7Rr7svLf98FKKbf+JHr3rOw4xFY1QhdegZBsV4x56IGN0gU/a9vRrZy1ZcSzSWA6/no1aFBlgehkyAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"42512fa4bf501a99201f269795e0708a42f054cf2340e532911e95449e652531","last_reissued_at":"2026-07-01T02:17:47.973914Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T02:17:47.973914Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.32038","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-01T02:17:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wSXIUBGnTXd8wFNAosNXOkl5zQ5AnN+eQVolPu504ngSuZPyssR+ZxhYEzT4GXK61WECsGBd5pnq2fUY8XypDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T23:47:44.939058Z"},"content_sha256":"217b7041af46311faf8058da2c75570f4790188982b65db215038b6cc17aef84","schema_version":"1.0","event_id":"sha256:217b7041af46311faf8058da2c75570f4790188982b65db215038b6cc17aef84"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:IJIS7JF7KANJSIA7E2LZLYDQRJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Introspective Coupling: Self-Explanation Training Tracks Behavioral Change Despite Fixed Supervision","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Belinda Z. Li, Jacob Andreas, Laura Ruis, Zifan Carl Guo","submitted_at":"2026-06-30T17:59:32Z","abstract_excerpt":"When does training language models (LMs) to generate explanations of their predictions yield faithful introspection, rather than superficial imitation? We study LMs trained to explain which features of their inputs influenced their behavior, using models' counterfactual behavior on modified inputs as supervision. Surprisingly, we find that LMs trained on fixed counterfactual explanations derived from earlier checkpoints of themselves, or even from behaviorally similar models in different families, frequently produce explanations more faithful to their own current behaviors than to those of the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.32038","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.32038/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-01T02:17:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+D7HAfq7dsLbwR0dem7VxyJmJj96VfcGzstS43MBhIobknk7P3UBlAibkayh9Ed/E7Op7gWf/YUM/NlqglEuAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T23:47:44.939810Z"},"content_sha256":"ae96e5cb99b999961115d0ca58ce142ff2eeea836d7aad4004c7967b92c55c13","schema_version":"1.0","event_id":"sha256:ae96e5cb99b999961115d0ca58ce142ff2eeea836d7aad4004c7967b92c55c13"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IJIS7JF7KANJSIA7E2LZLYDQRJ/bundle.json","state_url":"https://pith.science/pith/IJIS7JF7KANJSIA7E2LZLYDQRJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IJIS7JF7KANJSIA7E2LZLYDQRJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-01T23:47:44Z","links":{"resolver":"https://pith.science/pith/IJIS7JF7KANJSIA7E2LZLYDQRJ","bundle":"https://pith.science/pith/IJIS7JF7KANJSIA7E2LZLYDQRJ/bundle.json","state":"https://pith.science/pith/IJIS7JF7KANJSIA7E2LZLYDQRJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IJIS7JF7KANJSIA7E2LZLYDQRJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:IJIS7JF7KANJSIA7E2LZLYDQRJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c9930e941d73e7c336a5e6edcf0c293963c26d9703ca69251b524fc205169d08","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-30T17:59:32Z","title_canon_sha256":"f5cd3d38c14be2ac271daa40f8c7c712f859eead97b56826b54ef4f9a316b4b1"},"schema_version":"1.0","source":{"id":"2606.32038","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.32038","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"arxiv_version","alias_value":"2606.32038v1","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.32038","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"pith_short_12","alias_value":"IJIS7JF7KANJ","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"pith_short_16","alias_value":"IJIS7JF7KANJSIA7","created_at":"2026-07-01T02:17:47Z"},{"alias_kind":"pith_short_8","alias_value":"IJIS7JF7","created_at":"2026-07-01T02:17:47Z"}],"graph_snapshots":[{"event_id":"sha256:ae96e5cb99b999961115d0ca58ce142ff2eeea836d7aad4004c7967b92c55c13","target":"graph","created_at":"2026-07-01T02:17:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.32038/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"When does training language models (LMs) to generate explanations of their predictions yield faithful introspection, rather than superficial imitation? We study LMs trained to explain which features of their inputs influenced their behavior, using models' counterfactual behavior on modified inputs as supervision. Surprisingly, we find that LMs trained on fixed counterfactual explanations derived from earlier checkpoints of themselves, or even from behaviorally similar models in different families, frequently produce explanations more faithful to their own current behaviors than to those of the","authors_text":"Belinda Z. Li, Jacob Andreas, Laura Ruis, Zifan Carl Guo","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-30T17:59:32Z","title":"Introspective Coupling: Self-Explanation Training Tracks Behavioral Change Despite Fixed Supervision"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.32038","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:217b7041af46311faf8058da2c75570f4790188982b65db215038b6cc17aef84","target":"record","created_at":"2026-07-01T02:17:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c9930e941d73e7c336a5e6edcf0c293963c26d9703ca69251b524fc205169d08","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-30T17:59:32Z","title_canon_sha256":"f5cd3d38c14be2ac271daa40f8c7c712f859eead97b56826b54ef4f9a316b4b1"},"schema_version":"1.0","source":{"id":"2606.32038","kind":"arxiv","version":1}},"canonical_sha256":"42512fa4bf501a99201f269795e0708a42f054cf2340e532911e95449e652531","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"42512fa4bf501a99201f269795e0708a42f054cf2340e532911e95449e652531","first_computed_at":"2026-07-01T02:17:47.973914Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-01T02:17:47.973914Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ETJTgXQ7Rr7svLf98FKKbf+JHr3rOw4xFY1QhdegZBsV4x56IGN0gU/a9vRrZy1ZcSzSWA6/no1aFBlgehkyAw==","signature_status":"signed_v1","signed_at":"2026-07-01T02:17:47.974323Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.32038","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:217b7041af46311faf8058da2c75570f4790188982b65db215038b6cc17aef84","sha256:ae96e5cb99b999961115d0ca58ce142ff2eeea836d7aad4004c7967b92c55c13"],"state_sha256":"f64cd1a6291d18df01b7630c5a2a309eb14848cf8b6d6120c4759cb2c3c3ee06"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qF9Bzhblis+zG3rKt6qhuxVAv4/NWSjSb4h6Zaoq80s09TLLGBou1y5syE2We5V/GOzP60NxFxEk0jAQv9zQAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-01T23:47:44.943545Z","bundle_sha256":"b978fd60e5a26d646c15ffb6f8662c4ce3cfee5f23ad12d0a7145ae783ae491f"}}