{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:5GULZ3RTW7W3IQ5S53XNNY3NDC","short_pith_number":"pith:5GULZ3RT","schema_version":"1.0","canonical_sha256":"e9a8bcee33b7edb443b2eeeed6e36d189a3d5bc395639e5259109f989c9fedf4","source":{"kind":"arxiv","id":"2507.06457","version":2},"attestation_state":"computed","paper":{"title":"A Systematic Analysis of Hybrid Linear Attention","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dustin Wang, Ge Zhang, Jason Eshraghian, Jibin Wu, Rui-Jie Zhu, Steven Abreu, Taylor Kergan, Wenhao Huang, Yong Shan, Yuhong Chou, Yuqi Pan, Zheng Li","submitted_at":"2025-07-08T23:54:11Z","abstract_excerpt":"Transformers face quadratic complexity and memory issues with long sequences, prompting the adoption of linear attention mechanisms using fixed-size hidden states. However, linear models often suffer from limited recall performance, leading to hybrid architectures that combine linear and full attention layers. Despite extensive hybrid architecture research, the choice of linear attention component has not been deeply explored. We systematically evaluate various linear attention models across generations - vector recurrences to advanced gating mechanisms - both standalone and hybridized. To ena"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2507.06457","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-07-08T23:54:11Z","cross_cats_sorted":[],"title_canon_sha256":"09cc57158b8c4a8eec11d2e62edb0370fd431b47251dd7811d6346b7d488ed93","abstract_canon_sha256":"c46ad06cd2589feba05243831bd15fce537c4a973546bda83f807fdb6b9d34cf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-25T00:18:11.347460Z","signature_b64":"/dpQWHtTZSCiw8iCUiYLFAIoF8T3VJgp70TkMXAUhZh4KGN4gN8iwK4wgI1fuYQ+XOk2N/8P9ubuEFeC3vbwAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e9a8bcee33b7edb443b2eeeed6e36d189a3d5bc395639e5259109f989c9fedf4","last_reissued_at":"2026-06-25T00:18:11.346922Z","signature_status":"signed_v1","first_computed_at":"2026-06-25T00:18:11.346922Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Systematic Analysis of Hybrid Linear Attention","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dustin Wang, Ge Zhang, Jason Eshraghian, Jibin Wu, Rui-Jie Zhu, Steven Abreu, Taylor Kergan, Wenhao Huang, Yong Shan, Yuhong Chou, Yuqi Pan, Zheng Li","submitted_at":"2025-07-08T23:54:11Z","abstract_excerpt":"Transformers face quadratic complexity and memory issues with long sequences, prompting the adoption of linear attention mechanisms using fixed-size hidden states. However, linear models often suffer from limited recall performance, leading to hybrid architectures that combine linear and full attention layers. Despite extensive hybrid architecture research, the choice of linear attention component has not been deeply explored. We systematically evaluate various linear attention models across generations - vector recurrences to advanced gating mechanisms - both standalone and hybridized. To ena"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.06457","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.06457/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2507.06457","created_at":"2026-06-25T00:18:11.346983+00:00"},{"alias_kind":"arxiv_version","alias_value":"2507.06457v2","created_at":"2026-06-25T00:18:11.346983+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.06457","created_at":"2026-06-25T00:18:11.346983+00:00"},{"alias_kind":"pith_short_12","alias_value":"5GULZ3RTW7W3","created_at":"2026-06-25T00:18:11.346983+00:00"},{"alias_kind":"pith_short_16","alias_value":"5GULZ3RTW7W3IQ5S","created_at":"2026-06-25T00:18:11.346983+00:00"},{"alias_kind":"pith_short_8","alias_value":"5GULZ3RT","created_at":"2026-06-25T00:18:11.346983+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":8,"sample":[{"citing_arxiv_id":"2606.28831","citing_title":"HARD-KV: Head-Adaptive Regularization for Decoding-time KV Compression","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2606.30562","citing_title":"Morphing into Hybrid Attention Models","ref_index":58,"is_internal_anchor":true},{"citing_arxiv_id":"2509.24552","citing_title":"Short window attention enables long-term memorization","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2510.04800","citing_title":"Hybrid Architectures for Language Models: Systematic Analysis and Design Insights","ref_index":55,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24715","citing_title":"Long-Context Aware Upcycling: A New Frontier for Hybrid LLM Scaling","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05838","citing_title":"MDN: Parallelizing Stepwise Momentum for Delta Linear Attention","ref_index":112,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01106","citing_title":"Component-Aware Self-Speculative Decoding in Hybrid Language Models","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18058","citing_title":"Sonata: A Hybrid World Model for Inertial Kinematics under Clinical Data Scarcity","ref_index":45,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC","json":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC.json","graph_json":"https://pith.science/api/pith-number/5GULZ3RTW7W3IQ5S53XNNY3NDC/graph.json","events_json":"https://pith.science/api/pith-number/5GULZ3RTW7W3IQ5S53XNNY3NDC/events.json","paper":"https://pith.science/paper/5GULZ3RT"},"agent_actions":{"view_html":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC","download_json":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC.json","view_paper":"https://pith.science/paper/5GULZ3RT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2507.06457&json=true","fetch_graph":"https://pith.science/api/pith-number/5GULZ3RTW7W3IQ5S53XNNY3NDC/graph.json","fetch_events":"https://pith.science/api/pith-number/5GULZ3RTW7W3IQ5S53XNNY3NDC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC/action/storage_attestation","attest_author":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC/action/author_attestation","sign_citation":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC/action/citation_signature","submit_replication":"https://pith.science/pith/5GULZ3RTW7W3IQ5S53XNNY3NDC/action/replication_record"}},"created_at":"2026-06-25T00:18:11.346983+00:00","updated_at":"2026-06-25T00:18:11.346983+00:00"}