{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:DA3JI36F7ZOCO2XQYBJIEUJKKT","short_pith_number":"pith:DA3JI36F","schema_version":"1.0","canonical_sha256":"1836946fc5fe5c276af0c05282512a54d321d70434f5a52ca6d88436a57d65dc","source":{"kind":"arxiv","id":"2303.15619","version":2},"attestation_state":"computed","paper":{"title":"Typhoon: Towards an Effective Task-Specific Masking Strategy for Pre-trained Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Bruce Changlong Xu, Hashem Elezabi, Muhammed Shahir Abdurrahman","submitted_at":"2023-03-27T22:27:23Z","abstract_excerpt":"The choice of \\emph{which} tokens to mask is a central, under-examined design decision in masked language modeling (MLM). Standard pretraining masks tokens uniformly at random, but several studies show that more informative masking targets can improve downstream performance. We study masking as a \\emph{task-adaptive} component of the fine-tuning pipeline and introduce \\textbf{Typhoon}, a masking strategy that uses the gradient of the task loss with respect to one-hot token inputs to estimate, online, how much each token type contributes to the objective. Typhoon maintains an exponential moving"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2303.15619","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2023-03-27T22:27:23Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"8cb62f3f30057e8e4f7f2bbe142407eaa289f400e74ac4cb7fae7847ed12c095","abstract_canon_sha256":"dc2fc61f58bab28307d01a4af8622268bb0ab3c657aae18bd2ea3692ff3001cc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:02.849475Z","signature_b64":"P0tj936UbHdBBx2EsubEblrtBqex6m9Crd18BPCPaeLvZE5ZPiiRAeoHnhkfwl+EgFX6D4zcElMgbBgvTqiHCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1836946fc5fe5c276af0c05282512a54d321d70434f5a52ca6d88436a57d65dc","last_reissued_at":"2026-06-03T01:05:02.848997Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:02.848997Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Typhoon: Towards an Effective Task-Specific Masking Strategy for Pre-trained Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Bruce Changlong Xu, Hashem Elezabi, Muhammed Shahir Abdurrahman","submitted_at":"2023-03-27T22:27:23Z","abstract_excerpt":"The choice of \\emph{which} tokens to mask is a central, under-examined design decision in masked language modeling (MLM). Standard pretraining masks tokens uniformly at random, but several studies show that more informative masking targets can improve downstream performance. We study masking as a \\emph{task-adaptive} component of the fine-tuning pipeline and introduce \\textbf{Typhoon}, a masking strategy that uses the gradient of the task loss with respect to one-hot token inputs to estimate, online, how much each token type contributes to the objective. Typhoon maintains an exponential moving"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2303.15619","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2303.15619/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2303.15619","created_at":"2026-06-03T01:05:02.849061+00:00"},{"alias_kind":"arxiv_version","alias_value":"2303.15619v2","created_at":"2026-06-03T01:05:02.849061+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2303.15619","created_at":"2026-06-03T01:05:02.849061+00:00"},{"alias_kind":"pith_short_12","alias_value":"DA3JI36F7ZOC","created_at":"2026-06-03T01:05:02.849061+00:00"},{"alias_kind":"pith_short_16","alias_value":"DA3JI36F7ZOCO2XQ","created_at":"2026-06-03T01:05:02.849061+00:00"},{"alias_kind":"pith_short_8","alias_value":"DA3JI36F","created_at":"2026-06-03T01:05:02.849061+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT","json":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT.json","graph_json":"https://pith.science/api/pith-number/DA3JI36F7ZOCO2XQYBJIEUJKKT/graph.json","events_json":"https://pith.science/api/pith-number/DA3JI36F7ZOCO2XQYBJIEUJKKT/events.json","paper":"https://pith.science/paper/DA3JI36F"},"agent_actions":{"view_html":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT","download_json":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT.json","view_paper":"https://pith.science/paper/DA3JI36F","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2303.15619&json=true","fetch_graph":"https://pith.science/api/pith-number/DA3JI36F7ZOCO2XQYBJIEUJKKT/graph.json","fetch_events":"https://pith.science/api/pith-number/DA3JI36F7ZOCO2XQYBJIEUJKKT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT/action/storage_attestation","attest_author":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT/action/author_attestation","sign_citation":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT/action/citation_signature","submit_replication":"https://pith.science/pith/DA3JI36F7ZOCO2XQYBJIEUJKKT/action/replication_record"}},"created_at":"2026-06-03T01:05:02.849061+00:00","updated_at":"2026-06-03T01:05:02.849061+00:00"}