{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2020:RWMLBZXWHU5MHFXSBLAPN4LD6Y","short_pith_number":"pith:RWMLBZXW","schema_version":"1.0","canonical_sha256":"8d98b0e6f63d3ac396f20ac0f6f163f616a519dc3bc43ef9ebf6bb5793c25945","source":{"kind":"arxiv","id":"2008.05759","version":2},"attestation_state":"computed","paper":{"title":"MICE: Mining Idioms with Contextual Embeddings","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Marko Robnik-\\v{S}ikonja, Polona Gantar, Tadej \\v{S}kvorc","submitted_at":"2020-08-13T08:56:40Z","abstract_excerpt":"Idiomatic expressions can be problematic for natural language processing applications as their meaning cannot be inferred from their constituting words. A lack of successful methodological approaches and sufficiently large datasets prevents the development of machine learning approaches for detecting idioms, especially for expressions that do not occur in the training set. We present an approach, called MICE, that uses contextual embeddings for that purpose. We present a new dataset of multi-word expressions with literal and idiomatic meanings and use it to train a classifier based on two stat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2008.05759","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2020-08-13T08:56:40Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"def646c322a77e9f301f3ec9acd2e875a423287fd41b5302513bb02f5197fbbf","abstract_canon_sha256":"ba1126153d1ac83f115287cbcea30bd324440b2b370632f81249b3058173a462"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T03:30:37.375432Z","signature_b64":"lUXvlZuJ5dklm+dFl/+HKBt0OLFZ3N1/2X5hgbzo93b8xjE3bnX7eQ6cg2leaa5QsUjt5SJq84pXnmGxuqAUCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8d98b0e6f63d3ac396f20ac0f6f163f616a519dc3bc43ef9ebf6bb5793c25945","last_reissued_at":"2026-07-05T03:30:37.374957Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T03:30:37.374957Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MICE: Mining Idioms with Contextual Embeddings","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Marko Robnik-\\v{S}ikonja, Polona Gantar, Tadej \\v{S}kvorc","submitted_at":"2020-08-13T08:56:40Z","abstract_excerpt":"Idiomatic expressions can be problematic for natural language processing applications as their meaning cannot be inferred from their constituting words. A lack of successful methodological approaches and sufficiently large datasets prevents the development of machine learning approaches for detecting idioms, especially for expressions that do not occur in the training set. We present an approach, called MICE, that uses contextual embeddings for that purpose. We present a new dataset of multi-word expressions with literal and idiomatic meanings and use it to train a classifier based on two stat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2008.05759","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2008.05759/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2008.05759","created_at":"2026-07-05T03:30:37.375019+00:00"},{"alias_kind":"arxiv_version","alias_value":"2008.05759v2","created_at":"2026-07-05T03:30:37.375019+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2008.05759","created_at":"2026-07-05T03:30:37.375019+00:00"},{"alias_kind":"pith_short_12","alias_value":"RWMLBZXWHU5M","created_at":"2026-07-05T03:30:37.375019+00:00"},{"alias_kind":"pith_short_16","alias_value":"RWMLBZXWHU5MHFXS","created_at":"2026-07-05T03:30:37.375019+00:00"},{"alias_kind":"pith_short_8","alias_value":"RWMLBZXW","created_at":"2026-07-05T03:30:37.375019+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y","json":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y.json","graph_json":"https://pith.science/api/pith-number/RWMLBZXWHU5MHFXSBLAPN4LD6Y/graph.json","events_json":"https://pith.science/api/pith-number/RWMLBZXWHU5MHFXSBLAPN4LD6Y/events.json","paper":"https://pith.science/paper/RWMLBZXW"},"agent_actions":{"view_html":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y","download_json":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y.json","view_paper":"https://pith.science/paper/RWMLBZXW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2008.05759&json=true","fetch_graph":"https://pith.science/api/pith-number/RWMLBZXWHU5MHFXSBLAPN4LD6Y/graph.json","fetch_events":"https://pith.science/api/pith-number/RWMLBZXWHU5MHFXSBLAPN4LD6Y/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y/action/storage_attestation","attest_author":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y/action/author_attestation","sign_citation":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y/action/citation_signature","submit_replication":"https://pith.science/pith/RWMLBZXWHU5MHFXSBLAPN4LD6Y/action/replication_record"}},"created_at":"2026-07-05T03:30:37.375019+00:00","updated_at":"2026-07-05T03:30:37.375019+00:00"}