{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:UY6PZHOMB6XWMW36GJXOUFYBGI","short_pith_number":"pith:UY6PZHOM","schema_version":"1.0","canonical_sha256":"a63cfc9dcc0faf665b7e326eea170132323a5c56b068440ccb8191eabd4dded9","source":{"kind":"arxiv","id":"1709.05778","version":1},"attestation_state":"computed","paper":{"title":"Word Vector Enrichment of Low Frequency Words in the Bag-of-Words Model for Short Text Multi-class Classification Problems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Alfred Krzywicki, Bradford Heap, Michael Bain, Susanne Schmeidl, Wayne Wobcke","submitted_at":"2017-09-18T05:00:34Z","abstract_excerpt":"The bag-of-words model is a standard representation of text for many linear classifier learners. In many problem domains, linear classifiers are preferred over more complex models due to their efficiency, robustness and interpretability, and the bag-of-words text representation can capture sufficient information for linear classifiers to make highly accurate predictions. However in settings where there is a large vocabulary, large variance in the frequency of terms in the training corpus, many classes and very short text (e.g., single sentences or document titles) the bag-of-words representati"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1709.05778","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-18T05:00:34Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"d0e8b314426bec0cd876a5ff486d5a8a1fd32f718285f9e3b97609dfb6b947b4","abstract_canon_sha256":"4436cc0bf41000fb48fdc555f08b8f68cf1c953c339845c5d17779987aba07c5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:34:59.449542Z","signature_b64":"UpAFKYH8BW26U8y3rxjb9YuVKvlZ9w6rcJlUyy24yk2vSjOUeP1rFhcqzevKmtx/5JHF4wnt43BoY1rIYJHOAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a63cfc9dcc0faf665b7e326eea170132323a5c56b068440ccb8191eabd4dded9","last_reissued_at":"2026-05-18T00:34:59.448724Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:34:59.448724Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Word Vector Enrichment of Low Frequency Words in the Bag-of-Words Model for Short Text Multi-class Classification Problems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Alfred Krzywicki, Bradford Heap, Michael Bain, Susanne Schmeidl, Wayne Wobcke","submitted_at":"2017-09-18T05:00:34Z","abstract_excerpt":"The bag-of-words model is a standard representation of text for many linear classifier learners. In many problem domains, linear classifiers are preferred over more complex models due to their efficiency, robustness and interpretability, and the bag-of-words text representation can capture sufficient information for linear classifiers to make highly accurate predictions. However in settings where there is a large vocabulary, large variance in the frequency of terms in the training corpus, many classes and very short text (e.g., single sentences or document titles) the bag-of-words representati"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.05778","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1709.05778","created_at":"2026-05-18T00:34:59.448862+00:00"},{"alias_kind":"arxiv_version","alias_value":"1709.05778v1","created_at":"2026-05-18T00:34:59.448862+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.05778","created_at":"2026-05-18T00:34:59.448862+00:00"},{"alias_kind":"pith_short_12","alias_value":"UY6PZHOMB6XW","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_16","alias_value":"UY6PZHOMB6XWMW36","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_8","alias_value":"UY6PZHOM","created_at":"2026-05-18T12:31:49.984773+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI","json":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI.json","graph_json":"https://pith.science/api/pith-number/UY6PZHOMB6XWMW36GJXOUFYBGI/graph.json","events_json":"https://pith.science/api/pith-number/UY6PZHOMB6XWMW36GJXOUFYBGI/events.json","paper":"https://pith.science/paper/UY6PZHOM"},"agent_actions":{"view_html":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI","download_json":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI.json","view_paper":"https://pith.science/paper/UY6PZHOM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1709.05778&json=true","fetch_graph":"https://pith.science/api/pith-number/UY6PZHOMB6XWMW36GJXOUFYBGI/graph.json","fetch_events":"https://pith.science/api/pith-number/UY6PZHOMB6XWMW36GJXOUFYBGI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI/action/storage_attestation","attest_author":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI/action/author_attestation","sign_citation":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI/action/citation_signature","submit_replication":"https://pith.science/pith/UY6PZHOMB6XWMW36GJXOUFYBGI/action/replication_record"}},"created_at":"2026-05-18T00:34:59.448862+00:00","updated_at":"2026-05-18T00:34:59.448862+00:00"}