{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:PFQYPY7FR537HU6OHEFB23Y7SF","short_pith_number":"pith:PFQYPY7F","schema_version":"1.0","canonical_sha256":"796187e3e58f77f3d3ce390a1d6f1f9177facccae3157341a73cf46b17df67ab","source":{"kind":"arxiv","id":"2511.20409","version":2},"attestation_state":"computed","paper":{"title":"NormEval: A Unified Multi-Metric Framework for Evaluating Semantic Fidelity in Text Normalization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Md Abdullah Al Kafi, Raka Moni, Walayat Hussain","submitted_at":"2025-11-25T15:35:42Z","abstract_excerpt":"Text normalization methods such as stemming and lemmatization are fundamental components of NLP pipelines. As new normalization tools are developed for diverse languages, evaluation methodologies remain fragmented, relying on Compression Ratio, downstream accuracy, or sequence-to-sequence prediction scores in isolation, failing to distinguish between beneficial vocabulary reduction and harmful semantic distortion. Moreover, text normalization underpins intelligent systems in high-stakes domains, including clinical decision support and legal document analysis, and principled evaluation methodol"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2511.20409","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-11-25T15:35:42Z","cross_cats_sorted":[],"title_canon_sha256":"163258237e38b8c6cdca73a0c189216412aefd177b0a193666fa17749b965944","abstract_canon_sha256":"9c2d4496f5003b68417ccb50f091f520d6070d71c4ae735a8218f40182ea0d5c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:15.403379Z","signature_b64":"0WVn5wGcEr+QLNorS7IZ15HqMYxpDbAqJ6Y4lB3xbQAVtkop4QmGqbDCVPYTa3ig2Z8J0r+JZoUvlT4rEvyBCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"796187e3e58f77f3d3ce390a1d6f1f9177facccae3157341a73cf46b17df67ab","last_reissued_at":"2026-06-02T01:03:15.402922Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:15.402922Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"NormEval: A Unified Multi-Metric Framework for Evaluating Semantic Fidelity in Text Normalization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Md Abdullah Al Kafi, Raka Moni, Walayat Hussain","submitted_at":"2025-11-25T15:35:42Z","abstract_excerpt":"Text normalization methods such as stemming and lemmatization are fundamental components of NLP pipelines. As new normalization tools are developed for diverse languages, evaluation methodologies remain fragmented, relying on Compression Ratio, downstream accuracy, or sequence-to-sequence prediction scores in isolation, failing to distinguish between beneficial vocabulary reduction and harmful semantic distortion. Moreover, text normalization underpins intelligent systems in high-stakes domains, including clinical decision support and legal document analysis, and principled evaluation methodol"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.20409","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2511.20409/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.20409","created_at":"2026-06-02T01:03:15.402979+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.20409v2","created_at":"2026-06-02T01:03:15.402979+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.20409","created_at":"2026-06-02T01:03:15.402979+00:00"},{"alias_kind":"pith_short_12","alias_value":"PFQYPY7FR537","created_at":"2026-06-02T01:03:15.402979+00:00"},{"alias_kind":"pith_short_16","alias_value":"PFQYPY7FR537HU6O","created_at":"2026-06-02T01:03:15.402979+00:00"},{"alias_kind":"pith_short_8","alias_value":"PFQYPY7F","created_at":"2026-06-02T01:03:15.402979+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF","json":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF.json","graph_json":"https://pith.science/api/pith-number/PFQYPY7FR537HU6OHEFB23Y7SF/graph.json","events_json":"https://pith.science/api/pith-number/PFQYPY7FR537HU6OHEFB23Y7SF/events.json","paper":"https://pith.science/paper/PFQYPY7F"},"agent_actions":{"view_html":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF","download_json":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF.json","view_paper":"https://pith.science/paper/PFQYPY7F","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.20409&json=true","fetch_graph":"https://pith.science/api/pith-number/PFQYPY7FR537HU6OHEFB23Y7SF/graph.json","fetch_events":"https://pith.science/api/pith-number/PFQYPY7FR537HU6OHEFB23Y7SF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF/action/storage_attestation","attest_author":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF/action/author_attestation","sign_citation":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF/action/citation_signature","submit_replication":"https://pith.science/pith/PFQYPY7FR537HU6OHEFB23Y7SF/action/replication_record"}},"created_at":"2026-06-02T01:03:15.402979+00:00","updated_at":"2026-06-02T01:03:15.402979+00:00"}