{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:AOZLBUORYU23K7JELTQP3AAGO6","short_pith_number":"pith:AOZLBUOR","schema_version":"1.0","canonical_sha256":"03b2b0d1d1c535b57d245ce0fd800677b66bc47ea5245f8929414f59e07903cc","source":{"kind":"arxiv","id":"1701.01094","version":1},"attestation_state":"computed","paper":{"title":"Minimally-Supervised Attribute Fusion for Data Lakes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Garima Gupta, Gautam Shroff, Karamjit Singh, Puneet Agarwal","submitted_at":"2017-01-04T18:19:19Z","abstract_excerpt":"Aggregate analysis, such as comparing country-wise sales versus global market share across product categories, is often complicated by the unavailability of common join attributes, e.g., category, across diverse datasets from different geographies or retail chains, even after disparate data is technically ingested into a common data lake. Sometimes this is a missing data issue, while in other cases it may be inherent, e.g., the records in different geographical databases may actually describe different product 'SKUs', or follow different norms for categorization. Record linkage techniques can "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1701.01094","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-01-04T18:19:19Z","cross_cats_sorted":[],"title_canon_sha256":"b8faafdf02c9be80972320e24daf8da616e50614d28c442aecf17bfca0822bda","abstract_canon_sha256":"34d3b9b60c4e9e928e014cec7f7df60e9e773115e91ddabd2a74bf5f7a6ecc20"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:53:21.483636Z","signature_b64":"WYfpcEVb944DDYpfbiK+WfdU0/lSBn/FKF023iVJjsGkxt8i0pHpOjw/XcHvMLBNhlCmX24q4Vv2PiCJrxZbDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"03b2b0d1d1c535b57d245ce0fd800677b66bc47ea5245f8929414f59e07903cc","last_reissued_at":"2026-05-18T00:53:21.483093Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:53:21.483093Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Minimally-Supervised Attribute Fusion for Data Lakes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Garima Gupta, Gautam Shroff, Karamjit Singh, Puneet Agarwal","submitted_at":"2017-01-04T18:19:19Z","abstract_excerpt":"Aggregate analysis, such as comparing country-wise sales versus global market share across product categories, is often complicated by the unavailability of common join attributes, e.g., category, across diverse datasets from different geographies or retail chains, even after disparate data is technically ingested into a common data lake. Sometimes this is a missing data issue, while in other cases it may be inherent, e.g., the records in different geographical databases may actually describe different product 'SKUs', or follow different norms for categorization. Record linkage techniques can "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.01094","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1701.01094","created_at":"2026-05-18T00:53:21.483169+00:00"},{"alias_kind":"arxiv_version","alias_value":"1701.01094v1","created_at":"2026-05-18T00:53:21.483169+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.01094","created_at":"2026-05-18T00:53:21.483169+00:00"},{"alias_kind":"pith_short_12","alias_value":"AOZLBUORYU23","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_16","alias_value":"AOZLBUORYU23K7JE","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_8","alias_value":"AOZLBUOR","created_at":"2026-05-18T12:31:05.417338+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6","json":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6.json","graph_json":"https://pith.science/api/pith-number/AOZLBUORYU23K7JELTQP3AAGO6/graph.json","events_json":"https://pith.science/api/pith-number/AOZLBUORYU23K7JELTQP3AAGO6/events.json","paper":"https://pith.science/paper/AOZLBUOR"},"agent_actions":{"view_html":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6","download_json":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6.json","view_paper":"https://pith.science/paper/AOZLBUOR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1701.01094&json=true","fetch_graph":"https://pith.science/api/pith-number/AOZLBUORYU23K7JELTQP3AAGO6/graph.json","fetch_events":"https://pith.science/api/pith-number/AOZLBUORYU23K7JELTQP3AAGO6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6/action/storage_attestation","attest_author":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6/action/author_attestation","sign_citation":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6/action/citation_signature","submit_replication":"https://pith.science/pith/AOZLBUORYU23K7JELTQP3AAGO6/action/replication_record"}},"created_at":"2026-05-18T00:53:21.483169+00:00","updated_at":"2026-05-18T00:53:21.483169+00:00"}