{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OLSVQJKIXYNCLQOMBKK2YFOBAP","short_pith_number":"pith:OLSVQJKI","schema_version":"1.0","canonical_sha256":"72e5582548be1a25c1cc0a95ac15c103e6018fe90c9802e31e3c32f59aff0611","source":{"kind":"arxiv","id":"2606.06738","version":1},"attestation_state":"computed","paper":{"title":"Modular Monolingual Adaptation using Pretrained Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Nalin Kumar, Ond\\v{r}ej Du\\v{s}ek","submitted_at":"2026-06-04T21:51:50Z","abstract_excerpt":"Building monolingual language models (LMs) for low-resource languages typically relies on adapting pretrained language models (PLMs) by finetuning the whole model on the target language. This approach is widely favored over training from scratch, as it enables effective knowledge transfer. Additionally, prior work has shown that using a language-specific tokenizer can enhance the adaptability. In this work, we hypothesize that full model tuning is often unnecessary and propose a more modular approach. Specifically, we replace the tokens, freeze the corresponding embeddings, and tune the rest o"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.06738","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-04T21:51:50Z","cross_cats_sorted":[],"title_canon_sha256":"43a83a1bf69b41774e8ca93cd4c25ef4a5c0656d51db1e212f4da72a4cdc809a","abstract_canon_sha256":"1746cb7366efcdb5873d4e09cd9d691dbf0cd77d73b494a70ebcb8fb023503ac"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:04:25.442019Z","signature_b64":"t3xJmRoPaZqVNyQr6a6C0CRjicC6ly4brnMKSr3I6YqbPtCE0JuQYUlxxidEQoGNKX+1cFC+malrLVSWSOAwBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"72e5582548be1a25c1cc0a95ac15c103e6018fe90c9802e31e3c32f59aff0611","last_reissued_at":"2026-06-08T01:04:25.441265Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:04:25.441265Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Modular Monolingual Adaptation using Pretrained Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Nalin Kumar, Ond\\v{r}ej Du\\v{s}ek","submitted_at":"2026-06-04T21:51:50Z","abstract_excerpt":"Building monolingual language models (LMs) for low-resource languages typically relies on adapting pretrained language models (PLMs) by finetuning the whole model on the target language. This approach is widely favored over training from scratch, as it enables effective knowledge transfer. Additionally, prior work has shown that using a language-specific tokenizer can enhance the adaptability. In this work, we hypothesize that full model tuning is often unnecessary and propose a more modular approach. Specifically, we replace the tokens, freeze the corresponding embeddings, and tune the rest o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06738","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.06738/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.06738","created_at":"2026-06-08T01:04:25.441394+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.06738v1","created_at":"2026-06-08T01:04:25.441394+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06738","created_at":"2026-06-08T01:04:25.441394+00:00"},{"alias_kind":"pith_short_12","alias_value":"OLSVQJKIXYNC","created_at":"2026-06-08T01:04:25.441394+00:00"},{"alias_kind":"pith_short_16","alias_value":"OLSVQJKIXYNCLQOM","created_at":"2026-06-08T01:04:25.441394+00:00"},{"alias_kind":"pith_short_8","alias_value":"OLSVQJKI","created_at":"2026-06-08T01:04:25.441394+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP","json":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP.json","graph_json":"https://pith.science/api/pith-number/OLSVQJKIXYNCLQOMBKK2YFOBAP/graph.json","events_json":"https://pith.science/api/pith-number/OLSVQJKIXYNCLQOMBKK2YFOBAP/events.json","paper":"https://pith.science/paper/OLSVQJKI"},"agent_actions":{"view_html":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP","download_json":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP.json","view_paper":"https://pith.science/paper/OLSVQJKI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.06738&json=true","fetch_graph":"https://pith.science/api/pith-number/OLSVQJKIXYNCLQOMBKK2YFOBAP/graph.json","fetch_events":"https://pith.science/api/pith-number/OLSVQJKIXYNCLQOMBKK2YFOBAP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP/action/storage_attestation","attest_author":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP/action/author_attestation","sign_citation":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP/action/citation_signature","submit_replication":"https://pith.science/pith/OLSVQJKIXYNCLQOMBKK2YFOBAP/action/replication_record"}},"created_at":"2026-06-08T01:04:25.441394+00:00","updated_at":"2026-06-08T01:04:25.441394+00:00"}