{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:XHUXQYCSUAWXWUYBZRARHHFWNX","short_pith_number":"pith:XHUXQYCS","schema_version":"1.0","canonical_sha256":"b9e9786052a02d7b5301cc41139cb66dca788a6909eef71f99fed368f37e6b7d","source":{"kind":"arxiv","id":"1703.08741","version":2},"attestation_state":"computed","paper":{"title":"Clustering and Variable Selection in the Presence of Mixed Variable Types and Missing Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ME","authors_text":"Amy Weaver, Curtis Storlie, John Port, Paul Croarkin, Robert Colligan, Robert Voigt, Ruth Stoeckel, Scott Myers, S Katusic","submitted_at":"2017-03-25T20:57:56Z","abstract_excerpt":"We consider the problem of model-based clustering in the presence of many correlated, mixed continuous and discrete variables, some of which may have missing values. Discrete variables are treated with a latent continuous variable approach and the Dirichlet process is used to construct a mixture model with an unknown number of components. Variable selection is also performed to identify the variables that are most influential for determining cluster membership. The work is motivated by the need to cluster patients thought to potentially have autism spectrum disorder (ASD) on the basis of many "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1703.08741","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2017-03-25T20:57:56Z","cross_cats_sorted":[],"title_canon_sha256":"cf2355300fe81ad06677d7ff109ff5686f78da9e180b96cca89f5dd3d9ba54bc","abstract_canon_sha256":"90f9f532cad15d03b6baf50981459e2144b27e5493e7dd525bdcfc1bb8d5b4d5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:21:35.163153Z","signature_b64":"9JjL1iBIPau53uOUrzw/yV0xX9QFHOVj1nEuwHhklru3GAhquNQAuCc79A6ELAh+bGy2kvtUpjzLoROfhqG+AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b9e9786052a02d7b5301cc41139cb66dca788a6909eef71f99fed368f37e6b7d","last_reissued_at":"2026-05-18T00:21:35.162526Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:21:35.162526Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Clustering and Variable Selection in the Presence of Mixed Variable Types and Missing Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ME","authors_text":"Amy Weaver, Curtis Storlie, John Port, Paul Croarkin, Robert Colligan, Robert Voigt, Ruth Stoeckel, Scott Myers, S Katusic","submitted_at":"2017-03-25T20:57:56Z","abstract_excerpt":"We consider the problem of model-based clustering in the presence of many correlated, mixed continuous and discrete variables, some of which may have missing values. Discrete variables are treated with a latent continuous variable approach and the Dirichlet process is used to construct a mixture model with an unknown number of components. Variable selection is also performed to identify the variables that are most influential for determining cluster membership. The work is motivated by the need to cluster patients thought to potentially have autism spectrum disorder (ASD) on the basis of many "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.08741","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1703.08741","created_at":"2026-05-18T00:21:35.162620+00:00"},{"alias_kind":"arxiv_version","alias_value":"1703.08741v2","created_at":"2026-05-18T00:21:35.162620+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.08741","created_at":"2026-05-18T00:21:35.162620+00:00"},{"alias_kind":"pith_short_12","alias_value":"XHUXQYCSUAWX","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_16","alias_value":"XHUXQYCSUAWXWUYB","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_8","alias_value":"XHUXQYCS","created_at":"2026-05-18T12:31:53.515858+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX","json":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX.json","graph_json":"https://pith.science/api/pith-number/XHUXQYCSUAWXWUYBZRARHHFWNX/graph.json","events_json":"https://pith.science/api/pith-number/XHUXQYCSUAWXWUYBZRARHHFWNX/events.json","paper":"https://pith.science/paper/XHUXQYCS"},"agent_actions":{"view_html":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX","download_json":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX.json","view_paper":"https://pith.science/paper/XHUXQYCS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1703.08741&json=true","fetch_graph":"https://pith.science/api/pith-number/XHUXQYCSUAWXWUYBZRARHHFWNX/graph.json","fetch_events":"https://pith.science/api/pith-number/XHUXQYCSUAWXWUYBZRARHHFWNX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX/action/storage_attestation","attest_author":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX/action/author_attestation","sign_citation":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX/action/citation_signature","submit_replication":"https://pith.science/pith/XHUXQYCSUAWXWUYBZRARHHFWNX/action/replication_record"}},"created_at":"2026-05-18T00:21:35.162620+00:00","updated_at":"2026-05-18T00:21:35.162620+00:00"}