{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BTT56JT5PUHMWXB3KFHLZTWW5H","short_pith_number":"pith:BTT56JT5","schema_version":"1.0","canonical_sha256":"0ce7df267d7d0ecb5c3b514ebcced6e9db712a8b507153996e4e47ea3b41313c","source":{"kind":"arxiv","id":"2602.09075","version":3},"attestation_state":"computed","paper":{"title":"Learning to Remember, Learn, and Forget in Attention-Based Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Djohan Bonnet, Elidona Skhikerujah, Emre Neftci, Jamie Lohoff, Jan Finkbeiner","submitted_at":"2026-02-09T16:09:51Z","abstract_excerpt":"In-Context Learning (ICL) in transformers acts as an online associative memory and is believed to underpin their high performance on complex sequence processing tasks. However, in gated linear attention models, this memory has a fixed capacity and is prone to interference, especially for long sequences. We propose Palimpsa, a self-attention model that views ICL as a continual learning problem that must address a stability-plasticity dilemma. Palimpsa uses Bayesian metaplasticity, where the plasticity of each attention state is tied to an importance state grounded by a prior distribution that c"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.09075","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-09T16:09:51Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"df0248989134e262d91e566bf6bf47cf90f276770b633bdd0c76ed2117bc0ef5","abstract_canon_sha256":"e3b80a5cacca21f25902124ed32daae9aebfec2ad61544a4ba68f619706e9e7a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:42.661145Z","signature_b64":"HtpYNp1ilFuFEpL4bAG2D1x0rLlkqPEVQbV4cqTpDDmJR4oEkL8iXakkoISGitRTmg2Cg4+qPQ5aIAdmpdcgCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0ce7df267d7d0ecb5c3b514ebcced6e9db712a8b507153996e4e47ea3b41313c","last_reissued_at":"2026-06-02T01:03:42.660695Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:42.660695Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning to Remember, Learn, and Forget in Attention-Based Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Djohan Bonnet, Elidona Skhikerujah, Emre Neftci, Jamie Lohoff, Jan Finkbeiner","submitted_at":"2026-02-09T16:09:51Z","abstract_excerpt":"In-Context Learning (ICL) in transformers acts as an online associative memory and is believed to underpin their high performance on complex sequence processing tasks. However, in gated linear attention models, this memory has a fixed capacity and is prone to interference, especially for long sequences. We propose Palimpsa, a self-attention model that views ICL as a continual learning problem that must address a stability-plasticity dilemma. Palimpsa uses Bayesian metaplasticity, where the plasticity of each attention state is tied to an importance state grounded by a prior distribution that c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.09075","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.09075/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.09075","created_at":"2026-06-02T01:03:42.660755+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.09075v3","created_at":"2026-06-02T01:03:42.660755+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.09075","created_at":"2026-06-02T01:03:42.660755+00:00"},{"alias_kind":"pith_short_12","alias_value":"BTT56JT5PUHM","created_at":"2026-06-02T01:03:42.660755+00:00"},{"alias_kind":"pith_short_16","alias_value":"BTT56JT5PUHMWXB3","created_at":"2026-06-02T01:03:42.660755+00:00"},{"alias_kind":"pith_short_8","alias_value":"BTT56JT5","created_at":"2026-06-02T01:03:42.660755+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H","json":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H.json","graph_json":"https://pith.science/api/pith-number/BTT56JT5PUHMWXB3KFHLZTWW5H/graph.json","events_json":"https://pith.science/api/pith-number/BTT56JT5PUHMWXB3KFHLZTWW5H/events.json","paper":"https://pith.science/paper/BTT56JT5"},"agent_actions":{"view_html":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H","download_json":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H.json","view_paper":"https://pith.science/paper/BTT56JT5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.09075&json=true","fetch_graph":"https://pith.science/api/pith-number/BTT56JT5PUHMWXB3KFHLZTWW5H/graph.json","fetch_events":"https://pith.science/api/pith-number/BTT56JT5PUHMWXB3KFHLZTWW5H/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H/action/storage_attestation","attest_author":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H/action/author_attestation","sign_citation":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H/action/citation_signature","submit_replication":"https://pith.science/pith/BTT56JT5PUHMWXB3KFHLZTWW5H/action/replication_record"}},"created_at":"2026-06-02T01:03:42.660755+00:00","updated_at":"2026-06-02T01:03:42.660755+00:00"}