{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:7UYPZP6HTLDGJ4ZKOJU3C7HN7H","short_pith_number":"pith:7UYPZP6H","schema_version":"1.0","canonical_sha256":"fd30fcbfc79ac664f32a7269b17cedf9fb2b0eaae56cc0c7ad9447f35da16fd4","source":{"kind":"arxiv","id":"1410.4792","version":1},"attestation_state":"computed","paper":{"title":"Variational Bayes for Merging Noisy Databases","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"stat.ME","authors_text":"Rebecca C. Steorts, Tamara Broderick","submitted_at":"2014-10-17T16:46:45Z","abstract_excerpt":"Bayesian entity resolution merges together multiple, noisy databases and returns the minimal collection of unique individuals represented, together with their true, latent record values. Bayesian methods allow flexible generative models that share power across databases as well as principled quantification of uncertainty for queries of the final, resolved database. However, existing Bayesian methods for entity resolution use Markov monte Carlo method (MCMC) approximations and are too slow to run on modern databases containing millions or billions of records. Instead, we propose applying variat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1410.4792","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2014-10-17T16:46:45Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"3b8c97fce321bbfb3a819168af3f22fa798ed191fcb9f498dbc149a235f2e8d4","abstract_canon_sha256":"5ed63dedbbed5fca586abe89b33e6d22e97d4e59e22eb32d28279914f18be09f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:39:54.722823Z","signature_b64":"sp54LFtbYbOrfVC0TmiQdjZ85l+FCB3Sq7ty6rW+3ddCVf6lUuQ+JgNRlZ6ttqcTTeOwoDKA+Pd9AJ0Ir1jXDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fd30fcbfc79ac664f32a7269b17cedf9fb2b0eaae56cc0c7ad9447f35da16fd4","last_reissued_at":"2026-05-18T02:39:54.722221Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:39:54.722221Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Variational Bayes for Merging Noisy Databases","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"stat.ME","authors_text":"Rebecca C. Steorts, Tamara Broderick","submitted_at":"2014-10-17T16:46:45Z","abstract_excerpt":"Bayesian entity resolution merges together multiple, noisy databases and returns the minimal collection of unique individuals represented, together with their true, latent record values. Bayesian methods allow flexible generative models that share power across databases as well as principled quantification of uncertainty for queries of the final, resolved database. However, existing Bayesian methods for entity resolution use Markov monte Carlo method (MCMC) approximations and are too slow to run on modern databases containing millions or billions of records. Instead, we propose applying variat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1410.4792","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1410.4792","created_at":"2026-05-18T02:39:54.722301+00:00"},{"alias_kind":"arxiv_version","alias_value":"1410.4792v1","created_at":"2026-05-18T02:39:54.722301+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1410.4792","created_at":"2026-05-18T02:39:54.722301+00:00"},{"alias_kind":"pith_short_12","alias_value":"7UYPZP6HTLDG","created_at":"2026-05-18T12:28:19.803747+00:00"},{"alias_kind":"pith_short_16","alias_value":"7UYPZP6HTLDGJ4ZK","created_at":"2026-05-18T12:28:19.803747+00:00"},{"alias_kind":"pith_short_8","alias_value":"7UYPZP6H","created_at":"2026-05-18T12:28:19.803747+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H","json":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H.json","graph_json":"https://pith.science/api/pith-number/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/graph.json","events_json":"https://pith.science/api/pith-number/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/events.json","paper":"https://pith.science/paper/7UYPZP6H"},"agent_actions":{"view_html":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H","download_json":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H.json","view_paper":"https://pith.science/paper/7UYPZP6H","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1410.4792&json=true","fetch_graph":"https://pith.science/api/pith-number/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/graph.json","fetch_events":"https://pith.science/api/pith-number/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/action/storage_attestation","attest_author":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/action/author_attestation","sign_citation":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/action/citation_signature","submit_replication":"https://pith.science/pith/7UYPZP6HTLDGJ4ZKOJU3C7HN7H/action/replication_record"}},"created_at":"2026-05-18T02:39:54.722301+00:00","updated_at":"2026-05-18T02:39:54.722301+00:00"}