{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:5G3EPDVTS2G55TY3AROGZXSEVF","short_pith_number":"pith:5G3EPDVT","schema_version":"1.0","canonical_sha256":"e9b6478eb3968ddecf1b045c6cde44a964062e89114dec9ad4ac5269c41b20e3","source":{"kind":"arxiv","id":"1808.08949","version":2},"attestation_state":"computed","paper":{"title":"Dissecting Contextual Word Embeddings: Architecture and Representation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Luke Zettlemoyer, Mark Neumann, Matthew E. Peters, Wen-tau Yih","submitted_at":"2018-08-27T17:54:29Z","abstract_excerpt":"Contextual word representations derived from pre-trained bidirectional language models (biLMs) have recently been shown to provide significant improvements to the state of the art for a wide range of NLP tasks. However, many questions remain as to how and why these models are so effective. In this paper, we present a detailed empirical study of how the choice of neural architecture (e.g. LSTM, CNN, or self attention) influences both end task accuracy and qualitative properties of the representations that are learned. We show there is a tradeoff between speed and accuracy, but all architectures"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1808.08949","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-27T17:54:29Z","cross_cats_sorted":[],"title_canon_sha256":"43678cdadedf641227cd880ebd57dd9a2a1a65bd32f1f5c0f47f25d65267d769","abstract_canon_sha256":"149963d9e7cbb44c0c906ec1717ad597ec7a888269675cd75230a367e8239648"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:04:35.419481Z","signature_b64":"hstROllvNUFBgsbd3tXTPKSd7HgtUqtnk4zZua4aEXvF6txfGPIGuocSzZqkeE3WHNTu+I/01IFo1X9+jXW+DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e9b6478eb3968ddecf1b045c6cde44a964062e89114dec9ad4ac5269c41b20e3","last_reissued_at":"2026-05-18T00:04:35.418893Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:04:35.418893Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Dissecting Contextual Word Embeddings: Architecture and Representation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Luke Zettlemoyer, Mark Neumann, Matthew E. Peters, Wen-tau Yih","submitted_at":"2018-08-27T17:54:29Z","abstract_excerpt":"Contextual word representations derived from pre-trained bidirectional language models (biLMs) have recently been shown to provide significant improvements to the state of the art for a wide range of NLP tasks. However, many questions remain as to how and why these models are so effective. In this paper, we present a detailed empirical study of how the choice of neural architecture (e.g. LSTM, CNN, or self attention) influences both end task accuracy and qualitative properties of the representations that are learned. We show there is a tradeoff between speed and accuracy, but all architectures"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.08949","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1808.08949","created_at":"2026-05-18T00:04:35.418982+00:00"},{"alias_kind":"arxiv_version","alias_value":"1808.08949v2","created_at":"2026-05-18T00:04:35.418982+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.08949","created_at":"2026-05-18T00:04:35.418982+00:00"},{"alias_kind":"pith_short_12","alias_value":"5G3EPDVTS2G5","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_16","alias_value":"5G3EPDVTS2G55TY3","created_at":"2026-05-18T12:32:08.215937+00:00"},{"alias_kind":"pith_short_8","alias_value":"5G3EPDVT","created_at":"2026-05-18T12:32:08.215937+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2509.07794","citing_title":"Query Expansion in the Age of Pre-trained and Large Language Models: A Comprehensive Survey","ref_index":86,"is_internal_anchor":true},{"citing_arxiv_id":"2601.13334","citing_title":"SEER: Spectral Entropy Encoding of Roles for Context-Aware Attention-Based Design Pattern Detection","ref_index":16,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF","json":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF.json","graph_json":"https://pith.science/api/pith-number/5G3EPDVTS2G55TY3AROGZXSEVF/graph.json","events_json":"https://pith.science/api/pith-number/5G3EPDVTS2G55TY3AROGZXSEVF/events.json","paper":"https://pith.science/paper/5G3EPDVT"},"agent_actions":{"view_html":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF","download_json":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF.json","view_paper":"https://pith.science/paper/5G3EPDVT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1808.08949&json=true","fetch_graph":"https://pith.science/api/pith-number/5G3EPDVTS2G55TY3AROGZXSEVF/graph.json","fetch_events":"https://pith.science/api/pith-number/5G3EPDVTS2G55TY3AROGZXSEVF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF/action/storage_attestation","attest_author":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF/action/author_attestation","sign_citation":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF/action/citation_signature","submit_replication":"https://pith.science/pith/5G3EPDVTS2G55TY3AROGZXSEVF/action/replication_record"}},"created_at":"2026-05-18T00:04:35.418982+00:00","updated_at":"2026-05-18T00:04:35.418982+00:00"}