{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:KOPUMWG4GSJ3VKYL32PNXJY2DY","short_pith_number":"pith:KOPUMWG4","schema_version":"1.0","canonical_sha256":"539f4658dc3493baab0bde9edba71a1e017b1324a4fedc6fc59aec6200bfecba","source":{"kind":"arxiv","id":"2606.00605","version":1},"attestation_state":"computed","paper":{"title":"Looped Transformers with Layer Normalization Provably Learn the Power Method","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Chenyang Zhang, Lyumin Wu, Yuan Cao","submitted_at":"2026-05-30T08:05:27Z","abstract_excerpt":"Transformers have achieved remarkable success across a wide range of applications, and a growing body of work suggests that part of their strength comes from their ability to learn and execute algorithmic procedures. However, our understanding of how transformers learn such algorithms remains limited, especially in the presence of layer normalization (LN). In this work, we study principal component prediction as a concrete testbed for understanding the training dynamics of transformers with LN. We prove that a looped linear transformer with LN, trained by gradient descent, converges to a solut"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.00605","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-30T08:05:27Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"c8a085a0342dbe88206fb71aa41383221cef3f72e0b02100f9cde02cbbe6ee17","abstract_canon_sha256":"45bf1b07c2c44f00570da2feb3c2f7dd292aec129381d7d60e20d11cf57bd24e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:59.480000Z","signature_b64":"Pqk8QD4vvMD1xf4mYBmWezCXN5tZRT0LNvst6sWbwTkEmKGJ1p3HfCvY5A+uCE/WUWjqwtjhfvxn+6jy7kzECg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"539f4658dc3493baab0bde9edba71a1e017b1324a4fedc6fc59aec6200bfecba","last_reissued_at":"2026-06-02T01:03:59.479586Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:59.479586Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Looped Transformers with Layer Normalization Provably Learn the Power Method","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Chenyang Zhang, Lyumin Wu, Yuan Cao","submitted_at":"2026-05-30T08:05:27Z","abstract_excerpt":"Transformers have achieved remarkable success across a wide range of applications, and a growing body of work suggests that part of their strength comes from their ability to learn and execute algorithmic procedures. However, our understanding of how transformers learn such algorithms remains limited, especially in the presence of layer normalization (LN). In this work, we study principal component prediction as a concrete testbed for understanding the training dynamics of transformers with LN. We prove that a looped linear transformer with LN, trained by gradient descent, converges to a solut"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.00605","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.00605/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.00605","created_at":"2026-06-02T01:03:59.479645+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.00605v1","created_at":"2026-06-02T01:03:59.479645+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.00605","created_at":"2026-06-02T01:03:59.479645+00:00"},{"alias_kind":"pith_short_12","alias_value":"KOPUMWG4GSJ3","created_at":"2026-06-02T01:03:59.479645+00:00"},{"alias_kind":"pith_short_16","alias_value":"KOPUMWG4GSJ3VKYL","created_at":"2026-06-02T01:03:59.479645+00:00"},{"alias_kind":"pith_short_8","alias_value":"KOPUMWG4","created_at":"2026-06-02T01:03:59.479645+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY","json":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY.json","graph_json":"https://pith.science/api/pith-number/KOPUMWG4GSJ3VKYL32PNXJY2DY/graph.json","events_json":"https://pith.science/api/pith-number/KOPUMWG4GSJ3VKYL32PNXJY2DY/events.json","paper":"https://pith.science/paper/KOPUMWG4"},"agent_actions":{"view_html":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY","download_json":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY.json","view_paper":"https://pith.science/paper/KOPUMWG4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.00605&json=true","fetch_graph":"https://pith.science/api/pith-number/KOPUMWG4GSJ3VKYL32PNXJY2DY/graph.json","fetch_events":"https://pith.science/api/pith-number/KOPUMWG4GSJ3VKYL32PNXJY2DY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY/action/storage_attestation","attest_author":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY/action/author_attestation","sign_citation":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY/action/citation_signature","submit_replication":"https://pith.science/pith/KOPUMWG4GSJ3VKYL32PNXJY2DY/action/replication_record"}},"created_at":"2026-06-02T01:03:59.479645+00:00","updated_at":"2026-06-02T01:03:59.479645+00:00"}