{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2012:INPJQFJLGHXKQKTXE7TGO3FJRQ","short_pith_number":"pith:INPJQFJL","schema_version":"1.0","canonical_sha256":"435e98152b31eea82a7727e6676ca98c222f0555b1f93f9c15465f0a4af1406f","source":{"kind":"arxiv","id":"1211.6664","version":1},"attestation_state":"computed","paper":{"title":"Compression of structured high-throughput sequencing data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB","q-bio.GN"],"primary_cat":"q-bio.QM","authors_text":"Fabien Campagne, James T. Robinson, Jill P. Mesirov, Kevin C. Dorff, Nyasha Chambwe, Thomas D. Wu","submitted_at":"2012-11-28T17:11:54Z","abstract_excerpt":"Large biological datasets are being produced at a rapid pace and create substantial storage challenges, particularly in the domain of high-throughput sequencing (HTS). Most approaches currently used to store HTS data are either unable to quickly adapt to the requirements of new sequencing or analysis methods (because they do not support schema evolution), or fail to provide state of the art compression of the datasets. We have devised new approaches to store HTS data that support seamless data schema evolution and compress datasets substantially better than existing approaches. Building on the"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1211.6664","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-bio.QM","submitted_at":"2012-11-28T17:11:54Z","cross_cats_sorted":["cs.DB","q-bio.GN"],"title_canon_sha256":"3dfd115f0d9b4cbed62bf56797b0a492cdc3e5d8ef85bae63d74bc4bfc2e2a4d","abstract_canon_sha256":"2290dc7026816bd6642ac58313740ccf7f953081ba923eb512e0b977a0357895"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:57:15.047371Z","signature_b64":"sgNl2MTvgQimg0msmnt9KixCgqEYAmLRuifmdOwxlTe6+QCws8JIHnaEsm6ttIYxhM1cX3nEtzFV1ziwNiA5DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"435e98152b31eea82a7727e6676ca98c222f0555b1f93f9c15465f0a4af1406f","last_reissued_at":"2026-05-18T02:57:15.046741Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:57:15.046741Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Compression of structured high-throughput sequencing data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB","q-bio.GN"],"primary_cat":"q-bio.QM","authors_text":"Fabien Campagne, James T. Robinson, Jill P. Mesirov, Kevin C. Dorff, Nyasha Chambwe, Thomas D. Wu","submitted_at":"2012-11-28T17:11:54Z","abstract_excerpt":"Large biological datasets are being produced at a rapid pace and create substantial storage challenges, particularly in the domain of high-throughput sequencing (HTS). Most approaches currently used to store HTS data are either unable to quickly adapt to the requirements of new sequencing or analysis methods (because they do not support schema evolution), or fail to provide state of the art compression of the datasets. We have devised new approaches to store HTS data that support seamless data schema evolution and compress datasets substantially better than existing approaches. Building on the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1211.6664","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1211.6664","created_at":"2026-05-18T02:57:15.046831+00:00"},{"alias_kind":"arxiv_version","alias_value":"1211.6664v1","created_at":"2026-05-18T02:57:15.046831+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1211.6664","created_at":"2026-05-18T02:57:15.046831+00:00"},{"alias_kind":"pith_short_12","alias_value":"INPJQFJLGHXK","created_at":"2026-05-18T12:27:09.501522+00:00"},{"alias_kind":"pith_short_16","alias_value":"INPJQFJLGHXKQKTX","created_at":"2026-05-18T12:27:09.501522+00:00"},{"alias_kind":"pith_short_8","alias_value":"INPJQFJL","created_at":"2026-05-18T12:27:09.501522+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ","json":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ.json","graph_json":"https://pith.science/api/pith-number/INPJQFJLGHXKQKTXE7TGO3FJRQ/graph.json","events_json":"https://pith.science/api/pith-number/INPJQFJLGHXKQKTXE7TGO3FJRQ/events.json","paper":"https://pith.science/paper/INPJQFJL"},"agent_actions":{"view_html":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ","download_json":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ.json","view_paper":"https://pith.science/paper/INPJQFJL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1211.6664&json=true","fetch_graph":"https://pith.science/api/pith-number/INPJQFJLGHXKQKTXE7TGO3FJRQ/graph.json","fetch_events":"https://pith.science/api/pith-number/INPJQFJLGHXKQKTXE7TGO3FJRQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ/action/storage_attestation","attest_author":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ/action/author_attestation","sign_citation":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ/action/citation_signature","submit_replication":"https://pith.science/pith/INPJQFJLGHXKQKTXE7TGO3FJRQ/action/replication_record"}},"created_at":"2026-05-18T02:57:15.046831+00:00","updated_at":"2026-05-18T02:57:15.046831+00:00"}