{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:YKG2GZMKBPHKDGREQLC6LKGGYS","short_pith_number":"pith:YKG2GZMK","schema_version":"1.0","canonical_sha256":"c28da3658a0bcea19a2482c5e5a8c6c4abcafd2f0f01041c209d0bcfcdb921b2","source":{"kind":"arxiv","id":"1903.00458","version":1},"attestation_state":"computed","paper":{"title":"How to Hallucinate Functional Proteins","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"q-bio.QM","authors_text":"Hector Garcia Martin, Zak Costello","submitted_at":"2019-03-01T18:39:00Z","abstract_excerpt":"Here we present a novel approach to protein design and phenotypic inference using a generative model for protein sequences. BioSeqVAE, a variational autoencoder variant, can hallucinate syntactically valid protein sequences that are likely to fold and function. BioSeqVAE is trained on the entire known protein sequence space and learns to generate valid examples of protein sequences in an unsupervised manner. The model is validated by showing that its latent feature space is useful and that it accurately reconstructs sequences. Its usefulness is demonstrated with a selection of relevant downstr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.00458","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-bio.QM","submitted_at":"2019-03-01T18:39:00Z","cross_cats_sorted":[],"title_canon_sha256":"2d2e3a16040351971e6c4f2b147e350bd6d97c3750e07f6dbe9b71d8f3dc0950","abstract_canon_sha256":"b450e0221d401098e64e7abea0c12746d140721ad161b7a6f8cf564769c3daaa"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:19.604377Z","signature_b64":"Sex/EXiLM5raX95kZmJpXvU9KXLVkhv62eEI+XcpBLk7/SNSr+9yWUCqsuI4xxszefG5VxbcWJdcjyWfkSeMBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c28da3658a0bcea19a2482c5e5a8c6c4abcafd2f0f01041c209d0bcfcdb921b2","last_reissued_at":"2026-05-17T23:52:19.603791Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:19.603791Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"How to Hallucinate Functional Proteins","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"q-bio.QM","authors_text":"Hector Garcia Martin, Zak Costello","submitted_at":"2019-03-01T18:39:00Z","abstract_excerpt":"Here we present a novel approach to protein design and phenotypic inference using a generative model for protein sequences. BioSeqVAE, a variational autoencoder variant, can hallucinate syntactically valid protein sequences that are likely to fold and function. BioSeqVAE is trained on the entire known protein sequence space and learns to generate valid examples of protein sequences in an unsupervised manner. The model is validated by showing that its latent feature space is useful and that it accurately reconstructs sequences. Its usefulness is demonstrated with a selection of relevant downstr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.00458","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.00458","created_at":"2026-05-17T23:52:19.603889+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.00458v1","created_at":"2026-05-17T23:52:19.603889+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.00458","created_at":"2026-05-17T23:52:19.603889+00:00"},{"alias_kind":"pith_short_12","alias_value":"YKG2GZMKBPHK","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"YKG2GZMKBPHKDGRE","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"YKG2GZMK","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2606.11651","citing_title":"DeepRHP: A Hybrid Variational Autoencoder for Designing Random Heteropolymers as Protein Mimics","ref_index":9,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS","json":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS.json","graph_json":"https://pith.science/api/pith-number/YKG2GZMKBPHKDGREQLC6LKGGYS/graph.json","events_json":"https://pith.science/api/pith-number/YKG2GZMKBPHKDGREQLC6LKGGYS/events.json","paper":"https://pith.science/paper/YKG2GZMK"},"agent_actions":{"view_html":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS","download_json":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS.json","view_paper":"https://pith.science/paper/YKG2GZMK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.00458&json=true","fetch_graph":"https://pith.science/api/pith-number/YKG2GZMKBPHKDGREQLC6LKGGYS/graph.json","fetch_events":"https://pith.science/api/pith-number/YKG2GZMKBPHKDGREQLC6LKGGYS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS/action/storage_attestation","attest_author":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS/action/author_attestation","sign_citation":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS/action/citation_signature","submit_replication":"https://pith.science/pith/YKG2GZMKBPHKDGREQLC6LKGGYS/action/replication_record"}},"created_at":"2026-05-17T23:52:19.603889+00:00","updated_at":"2026-05-17T23:52:19.603889+00:00"}