{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:PX3GJGCNYWWKD2FUUNY3EGCXPZ","short_pith_number":"pith:PX3GJGCN","schema_version":"1.0","canonical_sha256":"7df664984dc5aca1e8b4a371b218577e45742206168f310e85baf0e27daaa1ea","source":{"kind":"arxiv","id":"1905.05981","version":1},"attestation_state":"computed","paper":{"title":"Improving Distributed Similarity Join in Metric Space with Error-bounded Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Chunbin Lin, Chunxiao Xing, Jiacheng Wu, Jin Wang, Yingjia Fu, Yong Zhang","submitted_at":"2019-05-15T07:07:28Z","abstract_excerpt":"Given two sets of objects, metric similarity join finds all similar pairs of objects according to a particular distance function in metric space. There is an increasing demand to provide a scalable similarity join framework which can support efficient query and analytical services in the era of Big Data. The existing distributed metric similarity join algorithms adopt random sampling techniques to produce pivots and utilize holistic partitioning methods based on the generated pivots to partition data, which results in data skew problem since both the generated pivots and the partition strategi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1905.05981","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2019-05-15T07:07:28Z","cross_cats_sorted":[],"title_canon_sha256":"1251f5c0bbc7f6952eda8fdcccfc56b1712418edf8fd38da0f5b9595170293bb","abstract_canon_sha256":"2a402abce5d5d5d4cd80fd8c5306f69601fb892c0bf6219e5aa77bbe45385542"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:46:07.884940Z","signature_b64":"jWOyFSDf6cas/vpA8NyvyMSvimqWYWLeuG6yt6FnrcpIA3xb8I3cXsxPfDVDP+F/Zsnk8yBVculdJcZy5+/ICA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7df664984dc5aca1e8b4a371b218577e45742206168f310e85baf0e27daaa1ea","last_reissued_at":"2026-05-17T23:46:07.884394Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:46:07.884394Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Improving Distributed Similarity Join in Metric Space with Error-bounded Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Chunbin Lin, Chunxiao Xing, Jiacheng Wu, Jin Wang, Yingjia Fu, Yong Zhang","submitted_at":"2019-05-15T07:07:28Z","abstract_excerpt":"Given two sets of objects, metric similarity join finds all similar pairs of objects according to a particular distance function in metric space. There is an increasing demand to provide a scalable similarity join framework which can support efficient query and analytical services in the era of Big Data. The existing distributed metric similarity join algorithms adopt random sampling techniques to produce pivots and utilize holistic partitioning methods based on the generated pivots to partition data, which results in data skew problem since both the generated pivots and the partition strategi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.05981","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1905.05981","created_at":"2026-05-17T23:46:07.884492+00:00"},{"alias_kind":"arxiv_version","alias_value":"1905.05981v1","created_at":"2026-05-17T23:46:07.884492+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.05981","created_at":"2026-05-17T23:46:07.884492+00:00"},{"alias_kind":"pith_short_12","alias_value":"PX3GJGCNYWWK","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_16","alias_value":"PX3GJGCNYWWKD2FU","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_8","alias_value":"PX3GJGCN","created_at":"2026-05-18T12:33:24.271573+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ","json":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ.json","graph_json":"https://pith.science/api/pith-number/PX3GJGCNYWWKD2FUUNY3EGCXPZ/graph.json","events_json":"https://pith.science/api/pith-number/PX3GJGCNYWWKD2FUUNY3EGCXPZ/events.json","paper":"https://pith.science/paper/PX3GJGCN"},"agent_actions":{"view_html":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ","download_json":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ.json","view_paper":"https://pith.science/paper/PX3GJGCN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1905.05981&json=true","fetch_graph":"https://pith.science/api/pith-number/PX3GJGCNYWWKD2FUUNY3EGCXPZ/graph.json","fetch_events":"https://pith.science/api/pith-number/PX3GJGCNYWWKD2FUUNY3EGCXPZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ/action/storage_attestation","attest_author":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ/action/author_attestation","sign_citation":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ/action/citation_signature","submit_replication":"https://pith.science/pith/PX3GJGCNYWWKD2FUUNY3EGCXPZ/action/replication_record"}},"created_at":"2026-05-17T23:46:07.884492+00:00","updated_at":"2026-05-17T23:46:07.884492+00:00"}