{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:MDLLV23O2KFE7SJVAWAV4BTIE2","short_pith_number":"pith:MDLLV23O","canonical_record":{"source":{"id":"1704.08960","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-04-28T14:46:25Z","cross_cats_sorted":[],"title_canon_sha256":"3b4a08f38c4a203916bb6fad765ad99b0665e09aed356cb332d87094fe5c3fb3","abstract_canon_sha256":"b581a918a1c11d4926852fcb1e986e67cbee0ca4647a51ffe3ca77b0cf25e7c5"},"schema_version":"1.0"},"canonical_sha256":"60d6baeb6ed28a4fc93505815e066826b87e1ec9509a566a7ef02bfab1b403ae","source":{"kind":"arxiv","id":"1704.08960","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.08960","created_at":"2026-05-18T00:45:24Z"},{"alias_kind":"arxiv_version","alias_value":"1704.08960v1","created_at":"2026-05-18T00:45:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.08960","created_at":"2026-05-18T00:45:24Z"},{"alias_kind":"pith_short_12","alias_value":"MDLLV23O2KFE","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MDLLV23O2KFE7SJV","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MDLLV23O","created_at":"2026-05-18T12:31:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:MDLLV23O2KFE7SJVAWAV4BTIE2","target":"record","payload":{"canonical_record":{"source":{"id":"1704.08960","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-04-28T14:46:25Z","cross_cats_sorted":[],"title_canon_sha256":"3b4a08f38c4a203916bb6fad765ad99b0665e09aed356cb332d87094fe5c3fb3","abstract_canon_sha256":"b581a918a1c11d4926852fcb1e986e67cbee0ca4647a51ffe3ca77b0cf25e7c5"},"schema_version":"1.0"},"canonical_sha256":"60d6baeb6ed28a4fc93505815e066826b87e1ec9509a566a7ef02bfab1b403ae","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:45:24.803152Z","signature_b64":"ye+mVOG5cwB7jJxnXXzAHUiVezVUZCCO0Gvo+QMWvxFpjMfEBpALioUdxnyB1iR3/KfW8Si02TP/y36tlLMyCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"60d6baeb6ed28a4fc93505815e066826b87e1ec9509a566a7ef02bfab1b403ae","last_reissued_at":"2026-05-18T00:45:24.802533Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:45:24.802533Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1704.08960","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:45:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mcf9JBSlY+d5ospsL5xImWAivdCxX53c+sAZSQAWtpSrrPC9zA35//18owr6bcaTwYPDt6McRruSEppgBB6vDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T07:47:23.641540Z"},"content_sha256":"80e14bddf192d891cb1ff26c5a1ddb4539fc32190cecf327bf4bf6cf87431792","schema_version":"1.0","event_id":"sha256:80e14bddf192d891cb1ff26c5a1ddb4539fc32190cecf327bf4bf6cf87431792"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:MDLLV23O2KFE7SJVAWAV4BTIE2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Neural Word Segmentation with Rich Pretraining","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fei Dong, Jie Yang, Yue Zhang","submitted_at":"2017-04-28T14:46:25Z","abstract_excerpt":"Neural word segmentation research has benefited from large-scale raw texts by leveraging them for pretraining character and word embeddings. On the other hand, statistical segmentation research has exploited richer sources of external information, such as punctuation, automatic segmentation and POS. We investigate the effectiveness of a range of external training sources for neural word segmentation by building a modular segmentation model, pretraining the most important submodule using rich external sources. Results show that such pretraining significantly improves the model, leading to accur"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.08960","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:45:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"06G0/GBx85Rzf4gOkt+LeKKDbEjIOdvEJf0YpmO13BI7xavlwoyQ+jW/LGmCLnmCMhsWDprZt9LgW0uEykBgAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T07:47:23.641896Z"},"content_sha256":"e14383efa8392023fe2eb3b636b4f09e2d4ef3b68dcaa9e1480132fac6e7efef","schema_version":"1.0","event_id":"sha256:e14383efa8392023fe2eb3b636b4f09e2d4ef3b68dcaa9e1480132fac6e7efef"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MDLLV23O2KFE7SJVAWAV4BTIE2/bundle.json","state_url":"https://pith.science/pith/MDLLV23O2KFE7SJVAWAV4BTIE2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MDLLV23O2KFE7SJVAWAV4BTIE2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T07:47:23Z","links":{"resolver":"https://pith.science/pith/MDLLV23O2KFE7SJVAWAV4BTIE2","bundle":"https://pith.science/pith/MDLLV23O2KFE7SJVAWAV4BTIE2/bundle.json","state":"https://pith.science/pith/MDLLV23O2KFE7SJVAWAV4BTIE2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MDLLV23O2KFE7SJVAWAV4BTIE2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:MDLLV23O2KFE7SJVAWAV4BTIE2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b581a918a1c11d4926852fcb1e986e67cbee0ca4647a51ffe3ca77b0cf25e7c5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-04-28T14:46:25Z","title_canon_sha256":"3b4a08f38c4a203916bb6fad765ad99b0665e09aed356cb332d87094fe5c3fb3"},"schema_version":"1.0","source":{"id":"1704.08960","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1704.08960","created_at":"2026-05-18T00:45:24Z"},{"alias_kind":"arxiv_version","alias_value":"1704.08960v1","created_at":"2026-05-18T00:45:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.08960","created_at":"2026-05-18T00:45:24Z"},{"alias_kind":"pith_short_12","alias_value":"MDLLV23O2KFE","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MDLLV23O2KFE7SJV","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MDLLV23O","created_at":"2026-05-18T12:31:31Z"}],"graph_snapshots":[{"event_id":"sha256:e14383efa8392023fe2eb3b636b4f09e2d4ef3b68dcaa9e1480132fac6e7efef","target":"graph","created_at":"2026-05-18T00:45:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Neural word segmentation research has benefited from large-scale raw texts by leveraging them for pretraining character and word embeddings. On the other hand, statistical segmentation research has exploited richer sources of external information, such as punctuation, automatic segmentation and POS. We investigate the effectiveness of a range of external training sources for neural word segmentation by building a modular segmentation model, pretraining the most important submodule using rich external sources. Results show that such pretraining significantly improves the model, leading to accur","authors_text":"Fei Dong, Jie Yang, Yue Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-04-28T14:46:25Z","title":"Neural Word Segmentation with Rich Pretraining"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.08960","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:80e14bddf192d891cb1ff26c5a1ddb4539fc32190cecf327bf4bf6cf87431792","target":"record","created_at":"2026-05-18T00:45:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b581a918a1c11d4926852fcb1e986e67cbee0ca4647a51ffe3ca77b0cf25e7c5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-04-28T14:46:25Z","title_canon_sha256":"3b4a08f38c4a203916bb6fad765ad99b0665e09aed356cb332d87094fe5c3fb3"},"schema_version":"1.0","source":{"id":"1704.08960","kind":"arxiv","version":1}},"canonical_sha256":"60d6baeb6ed28a4fc93505815e066826b87e1ec9509a566a7ef02bfab1b403ae","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"60d6baeb6ed28a4fc93505815e066826b87e1ec9509a566a7ef02bfab1b403ae","first_computed_at":"2026-05-18T00:45:24.802533Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:45:24.802533Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ye+mVOG5cwB7jJxnXXzAHUiVezVUZCCO0Gvo+QMWvxFpjMfEBpALioUdxnyB1iR3/KfW8Si02TP/y36tlLMyCw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:45:24.803152Z","signed_message":"canonical_sha256_bytes"},"source_id":"1704.08960","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:80e14bddf192d891cb1ff26c5a1ddb4539fc32190cecf327bf4bf6cf87431792","sha256:e14383efa8392023fe2eb3b636b4f09e2d4ef3b68dcaa9e1480132fac6e7efef"],"state_sha256":"caf4a202a89bc9e69fdd3e10e094f3bcd35f7f622a892fbed21158d8240d4935"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4wEUKkuDbCF0CCSUWXD2bgwsGaFyW2xaQ8Ut32FMPWIJqM2fbAxUcSdfIFBft7KmJ57UO31X9oNvVpQZZ+0WCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T07:47:23.644020Z","bundle_sha256":"7d84d5a9211cb61dd0a68ff1485a5cd675212277fceca4ea4c190c76e5100d76"}}