{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:3BPO6NN77NUWATGX7TTLQSRPAX","short_pith_number":"pith:3BPO6NN7","canonical_record":{"source":{"id":"2604.12277","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-14T04:43:29Z","cross_cats_sorted":[],"title_canon_sha256":"2e0c2f916334614b0721e7be1127eb6da8adaa369e9c66cd5d70afb6c0db44d1","abstract_canon_sha256":"156c22b847e73b5ef7b0b920bd4f8f7b21d53ca768756be846ee31ad569a5c31"},"schema_version":"1.0"},"canonical_sha256":"d85eef35bffb69604cd7fce6b84a2f05e445694e14479ac48fd9c74b693e26cc","source":{"kind":"arxiv","id":"2604.12277","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.12277","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"arxiv_version","alias_value":"2604.12277v2","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.12277","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"pith_short_12","alias_value":"3BPO6NN77NUW","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"pith_short_16","alias_value":"3BPO6NN77NUWATGX","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"pith_short_8","alias_value":"3BPO6NN7","created_at":"2026-06-09T01:04:42Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:3BPO6NN77NUWATGX7TTLQSRPAX","target":"record","payload":{"canonical_record":{"source":{"id":"2604.12277","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-14T04:43:29Z","cross_cats_sorted":[],"title_canon_sha256":"2e0c2f916334614b0721e7be1127eb6da8adaa369e9c66cd5d70afb6c0db44d1","abstract_canon_sha256":"156c22b847e73b5ef7b0b920bd4f8f7b21d53ca768756be846ee31ad569a5c31"},"schema_version":"1.0"},"canonical_sha256":"d85eef35bffb69604cd7fce6b84a2f05e445694e14479ac48fd9c74b693e26cc","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:04:42.787558Z","signature_b64":"mGQCLbgkb6vFKH6G0ogj4+Svk4aDEGlTGomwNLsQW1gEe3ZqE95FW8x82ozCVoB58oZnIs6INNs9t0J04FlJBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d85eef35bffb69604cd7fce6b84a2f05e445694e14479ac48fd9c74b693e26cc","last_reissued_at":"2026-06-09T01:04:42.787145Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:04:42.787145Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.12277","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:04:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2TQ59BD6t0ZGGEAzDLkzpu1KozutbVECtbd0CHQRF+JmL1GghoeOFKRDrLT0c74h1NAEXqwabwUu+vvqdZOECw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T08:51:45.627680Z"},"content_sha256":"6ffb4e6013eec2e94e9f8bd191dee012a7afecbff5a269d90fbc3f739c91ce67","schema_version":"1.0","event_id":"sha256:6ffb4e6013eec2e94e9f8bd191dee012a7afecbff5a269d90fbc3f739c91ce67"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:3BPO6NN77NUWATGX7TTLQSRPAX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Models Know Their Shortcuts: Deployment-Time Shortcut Mitigation","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"Language models can identify and mitigate their own token-level shortcuts at deployment time using only gradient attributions from the biased model itself.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Carl Kingsford, G\\\"un Kaynar, Jiayi Li, Shijie Tang, Shiyi Du","submitted_at":"2026-04-14T04:43:29Z","abstract_excerpt":"Pretrained text encoders are prone to shortcut learning, relying on token-label correlations that fail once the distribution shifts in deployment. Existing shortcut mitigation methods mainly operate at training time and assume access to training data, training dynamics, or shortcut annotations, which are hardly available during deployment, where only the converged model remains. We show that this model alone suffices to mitigate shortcuts during deployment: a biased model internalizes a signal of its learned shortcuts that can be captured via unsupervised gradient-based attribution. We further"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Shortcut Guardrail, a deployment-time framework that mitigates token-level shortcuts without access to the original training data or shortcut annotations... improves overall accuracy and worst-group accuracy over the unmitigated model under distribution shifts while preserving in-distribution performance.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"Gradient-based attribution on a biased model reliably highlights shortcut tokens, and training a LoRA-based debiasing module with the Masked Contrastive Learning objective produces consistent representations without original data or annotations.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Shortcut Guardrail mitigates token-level shortcuts in pretrained language models at deployment time via gradient-based token identification and a LoRA-trained Masked Contrastive Learning module, improving accuracy under distribution shifts while preserving in-distribution performance.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Language models can identify and mitigate their own token-level shortcuts at deployment time using only gradient attributions from the biased model itself.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"bce2392a08ffb4905b3eb98ad182951b6b8b4c3a00ac1be80b2b17cd388846a0"},"source":{"id":"2604.12277","kind":"arxiv","version":2},"verdict":{"id":"0eab5d2f-6ec8-4c2d-9063-28027eaeb8d8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T16:26:04.060999Z","strongest_claim":"Shortcut Guardrail, a deployment-time framework that mitigates token-level shortcuts without access to the original training data or shortcut annotations... improves overall accuracy and worst-group accuracy over the unmitigated model under distribution shifts while preserving in-distribution performance.","one_line_summary":"Shortcut Guardrail mitigates token-level shortcuts in pretrained language models at deployment time via gradient-based token identification and a LoRA-trained Masked Contrastive Learning module, improving accuracy under distribution shifts while preserving in-distribution performance.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"Gradient-based attribution on a biased model reliably highlights shortcut tokens, and training a LoRA-based debiasing module with the Masked Contrastive Learning objective produces consistent representations without original data or annotations.","pith_extraction_headline":"Language models can identify and mitigate their own token-level shortcuts at deployment time using only gradient attributions from the biased model itself."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.12277/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"0eab5d2f-6ec8-4c2d-9063-28027eaeb8d8"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:04:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"H01ilxAAX5ClBqXDIbWK9dMYFbzHkYwEinEd2FjiuFdEAllygDuprmNRmT17Iv/St9+DdgFhwQqORpQ5eNgvAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T08:51:45.628127Z"},"content_sha256":"9aa60fb747472a9bd9cc29f146708b9446460b3c06d2a82c633424a5d5982037","schema_version":"1.0","event_id":"sha256:9aa60fb747472a9bd9cc29f146708b9446460b3c06d2a82c633424a5d5982037"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3BPO6NN77NUWATGX7TTLQSRPAX/bundle.json","state_url":"https://pith.science/pith/3BPO6NN77NUWATGX7TTLQSRPAX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3BPO6NN77NUWATGX7TTLQSRPAX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-28T08:51:45Z","links":{"resolver":"https://pith.science/pith/3BPO6NN77NUWATGX7TTLQSRPAX","bundle":"https://pith.science/pith/3BPO6NN77NUWATGX7TTLQSRPAX/bundle.json","state":"https://pith.science/pith/3BPO6NN77NUWATGX7TTLQSRPAX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3BPO6NN77NUWATGX7TTLQSRPAX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3BPO6NN77NUWATGX7TTLQSRPAX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"156c22b847e73b5ef7b0b920bd4f8f7b21d53ca768756be846ee31ad569a5c31","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-14T04:43:29Z","title_canon_sha256":"2e0c2f916334614b0721e7be1127eb6da8adaa369e9c66cd5d70afb6c0db44d1"},"schema_version":"1.0","source":{"id":"2604.12277","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.12277","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"arxiv_version","alias_value":"2604.12277v2","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.12277","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"pith_short_12","alias_value":"3BPO6NN77NUW","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"pith_short_16","alias_value":"3BPO6NN77NUWATGX","created_at":"2026-06-09T01:04:42Z"},{"alias_kind":"pith_short_8","alias_value":"3BPO6NN7","created_at":"2026-06-09T01:04:42Z"}],"graph_snapshots":[{"event_id":"sha256:9aa60fb747472a9bd9cc29f146708b9446460b3c06d2a82c633424a5d5982037","target":"graph","created_at":"2026-06-09T01:04:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Shortcut Guardrail, a deployment-time framework that mitigates token-level shortcuts without access to the original training data or shortcut annotations... improves overall accuracy and worst-group accuracy over the unmitigated model under distribution shifts while preserving in-distribution performance."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"Gradient-based attribution on a biased model reliably highlights shortcut tokens, and training a LoRA-based debiasing module with the Masked Contrastive Learning objective produces consistent representations without original data or annotations."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Shortcut Guardrail mitigates token-level shortcuts in pretrained language models at deployment time via gradient-based token identification and a LoRA-trained Masked Contrastive Learning module, improving accuracy under distribution shifts while preserving in-distribution performance."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Language models can identify and mitigate their own token-level shortcuts at deployment time using only gradient attributions from the biased model itself."}],"snapshot_sha256":"bce2392a08ffb4905b3eb98ad182951b6b8b4c3a00ac1be80b2b17cd388846a0"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.12277/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Pretrained text encoders are prone to shortcut learning, relying on token-label correlations that fail once the distribution shifts in deployment. Existing shortcut mitigation methods mainly operate at training time and assume access to training data, training dynamics, or shortcut annotations, which are hardly available during deployment, where only the converged model remains. We show that this model alone suffices to mitigate shortcuts during deployment: a biased model internalizes a signal of its learned shortcuts that can be captured via unsupervised gradient-based attribution. We further","authors_text":"Carl Kingsford, G\\\"un Kaynar, Jiayi Li, Shijie Tang, Shiyi Du","cross_cats":[],"headline":"Language models can identify and mitigate their own token-level shortcuts at deployment time using only gradient attributions from the biased model itself.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-14T04:43:29Z","title":"Models Know Their Shortcuts: Deployment-Time Shortcut Mitigation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.12277","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T16:26:04.060999Z","id":"0eab5d2f-6ec8-4c2d-9063-28027eaeb8d8","model_set":{"reader":"grok-4.3"},"one_line_summary":"Shortcut Guardrail mitigates token-level shortcuts in pretrained language models at deployment time via gradient-based token identification and a LoRA-trained Masked Contrastive Learning module, improving accuracy under distribution shifts while preserving in-distribution performance.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Language models can identify and mitigate their own token-level shortcuts at deployment time using only gradient attributions from the biased model itself.","strongest_claim":"Shortcut Guardrail, a deployment-time framework that mitigates token-level shortcuts without access to the original training data or shortcut annotations... improves overall accuracy and worst-group accuracy over the unmitigated model under distribution shifts while preserving in-distribution performance.","weakest_assumption":"Gradient-based attribution on a biased model reliably highlights shortcut tokens, and training a LoRA-based debiasing module with the Masked Contrastive Learning objective produces consistent representations without original data or annotations."}},"verdict_id":"0eab5d2f-6ec8-4c2d-9063-28027eaeb8d8"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6ffb4e6013eec2e94e9f8bd191dee012a7afecbff5a269d90fbc3f739c91ce67","target":"record","created_at":"2026-06-09T01:04:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"156c22b847e73b5ef7b0b920bd4f8f7b21d53ca768756be846ee31ad569a5c31","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-14T04:43:29Z","title_canon_sha256":"2e0c2f916334614b0721e7be1127eb6da8adaa369e9c66cd5d70afb6c0db44d1"},"schema_version":"1.0","source":{"id":"2604.12277","kind":"arxiv","version":2}},"canonical_sha256":"d85eef35bffb69604cd7fce6b84a2f05e445694e14479ac48fd9c74b693e26cc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d85eef35bffb69604cd7fce6b84a2f05e445694e14479ac48fd9c74b693e26cc","first_computed_at":"2026-06-09T01:04:42.787145Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:04:42.787145Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mGQCLbgkb6vFKH6G0ogj4+Svk4aDEGlTGomwNLsQW1gEe3ZqE95FW8x82ozCVoB58oZnIs6INNs9t0J04FlJBg==","signature_status":"signed_v1","signed_at":"2026-06-09T01:04:42.787558Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.12277","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6ffb4e6013eec2e94e9f8bd191dee012a7afecbff5a269d90fbc3f739c91ce67","sha256:9aa60fb747472a9bd9cc29f146708b9446460b3c06d2a82c633424a5d5982037"],"state_sha256":"d99628ec8bd830e12c79200eaa95271c2e678503506acc08f87baf5aa4ccf475"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"e90489QQT7vjH/VgDVozmRzJ0e3MtA6Wlslzd0Dxs6F9v+O/SilJHqYpZZLuY9dgHc7o5SWOgNDVFbgAJnKVDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-28T08:51:45.630364Z","bundle_sha256":"7364b941fd1a2053a9ba9def963e208c3f5ca2fe555dd95ed62060e6221418c0"}}