{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:TEQJDJBPTWJVHGFUCTHSFI22SD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b8df6a7604b8835ca650b615f9235527db71ac3e96a072eee66867a5b8b036d8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-31T16:32:50Z","title_canon_sha256":"4e2a00796a2d577f3afe718b69e00b691a3e7b1da0e625ecb6d9067696a7ca37"},"schema_version":"1.0","source":{"id":"1807.11906","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.11906","created_at":"2026-05-18T00:09:03Z"},{"alias_kind":"arxiv_version","alias_value":"1807.11906v2","created_at":"2026-05-18T00:09:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.11906","created_at":"2026-05-18T00:09:03Z"},{"alias_kind":"pith_short_12","alias_value":"TEQJDJBPTWJV","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"TEQJDJBPTWJVHGFU","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"TEQJDJBP","created_at":"2026-05-18T12:32:53Z"}],"graph_snapshots":[{"event_id":"sha256:622123f5ff6fe33a1d50fd752fcac39821a80d64e0915909ee90ea980d3d54f1","target":"graph","created_at":"2026-05-18T00:09:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper presents an effective approach for parallel corpus mining using bilingual sentence embeddings. Our embedding models are trained to produce similar representations exclusively for bilingual sentence pairs that are translations of each other. This is achieved using a novel training method that introduces hard negatives consisting of sentences that are not translations but that have some degree of semantic similarity. The quality of the resulting embeddings are evaluated on parallel corpus reconstruction and by assessing machine translation systems trained on gold vs. mined sentence pa","authors_text":"Brian Strope, Daniel Cer, Gustavo Hernandez Abrego, Heming Ge, Keith Stevens, Mandy Guo, Noah Constant, Qinlan Shen, Ray Kurzweil, Yinfei Yang, Yun-hsuan Sung","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-31T16:32:50Z","title":"Effective Parallel Corpus Mining using Bilingual Sentence Embeddings"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.11906","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:62094961a61307a299c88949d498e8a0c87d3db11fbf099ef4548da34207abab","target":"record","created_at":"2026-05-18T00:09:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b8df6a7604b8835ca650b615f9235527db71ac3e96a072eee66867a5b8b036d8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-07-31T16:32:50Z","title_canon_sha256":"4e2a00796a2d577f3afe718b69e00b691a3e7b1da0e625ecb6d9067696a7ca37"},"schema_version":"1.0","source":{"id":"1807.11906","kind":"arxiv","version":2}},"canonical_sha256":"992091a42f9d935398b414cf22a35a90d21ad7861e09dccbaabc4f4500b0ac1f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"992091a42f9d935398b414cf22a35a90d21ad7861e09dccbaabc4f4500b0ac1f","first_computed_at":"2026-05-18T00:09:03.650074Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:09:03.650074Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"M6CQ9lfEe5OvuWfCFFjYks84KtKE6xe3GjeXSBCEl0r0BdcrVCMO1bbveyd/7COCmOhsYl6NFlHgLQbthe5IBw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:09:03.650619Z","signed_message":"canonical_sha256_bytes"},"source_id":"1807.11906","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:62094961a61307a299c88949d498e8a0c87d3db11fbf099ef4548da34207abab","sha256:622123f5ff6fe33a1d50fd752fcac39821a80d64e0915909ee90ea980d3d54f1"],"state_sha256":"239d34e1d82fe911871cd44f7a33e8f5756dae888dc3a131f8e24778e4199ede"}