{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:34P566VUN6B46VKVKK2IIRUPF4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9d87aee0ecf5569251ecfc6cf72c6aafc04132bcc89c5209381ad91510d9addb","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-29T11:51:00Z","title_canon_sha256":"31823a2e6f68246c129461939145ca66320dd4bdb0c3d1b7730a633f865dc3cd"},"schema_version":"1.0","source":{"id":"2606.30175","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.30175","created_at":"2026-06-30T02:17:52Z"},{"alias_kind":"arxiv_version","alias_value":"2606.30175v1","created_at":"2026-06-30T02:17:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.30175","created_at":"2026-06-30T02:17:52Z"},{"alias_kind":"pith_short_12","alias_value":"34P566VUN6B4","created_at":"2026-06-30T02:17:52Z"},{"alias_kind":"pith_short_16","alias_value":"34P566VUN6B46VKV","created_at":"2026-06-30T02:17:52Z"},{"alias_kind":"pith_short_8","alias_value":"34P566VU","created_at":"2026-06-30T02:17:52Z"}],"graph_snapshots":[{"event_id":"sha256:6a0eccabc478b93312585dec17aee5378c375c0da08ec4cab64f483561d179ac","target":"graph","created_at":"2026-06-30T02:17:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.30175/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The continuous evolution of large language models drives escalating demands on data scale and quality, and as different training stages impose increasingly tailored data requirements, systematic organization of high-quality corpora becomes indispensable. Existing corpus construction pipelines confine the resulting corpora to flat, undifferentiated document collections, universally lacking systematic knowledge organization. We present Cortex, to our knowledge the first framework that elevates web-scale corpus construction from flat document filtering to structured knowledge organization through","authors_text":"Chengtao Gan, Huajun Chen, Songze Li, Wen Zhang, Xiaoke Guo, Yushan Zhu, Zhaoyan Gong, Zhiqiang Liu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-29T11:51:00Z","title":"CORTEX: High-Quality Cross-Domain Organization of Web-Scale Corpora through Ontological Corpus Graph"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.30175","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1289b85d90477cd9f18bbb9c095d8b889a84cb3d0ee753070ca61a7bb015d62d","target":"record","created_at":"2026-06-30T02:17:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9d87aee0ecf5569251ecfc6cf72c6aafc04132bcc89c5209381ad91510d9addb","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-29T11:51:00Z","title_canon_sha256":"31823a2e6f68246c129461939145ca66320dd4bdb0c3d1b7730a633f865dc3cd"},"schema_version":"1.0","source":{"id":"2606.30175","kind":"arxiv","version":1}},"canonical_sha256":"df1fdf7ab46f83cf555552b484468f2f06a6b2f6db98db29ca7d994fa8c7ff44","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"df1fdf7ab46f83cf555552b484468f2f06a6b2f6db98db29ca7d994fa8c7ff44","first_computed_at":"2026-06-30T02:17:52.608373Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-30T02:17:52.608373Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"G+JWowe77IwKAQIb1ULsJx0PgOB8EpfbNHC9f0NaA7xITOl0BQ8jZu4dZAQjL9bhS00OOzt1XbRBwb4+YI6zAg==","signature_status":"signed_v1","signed_at":"2026-06-30T02:17:52.608833Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.30175","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1289b85d90477cd9f18bbb9c095d8b889a84cb3d0ee753070ca61a7bb015d62d","sha256:6a0eccabc478b93312585dec17aee5378c375c0da08ec4cab64f483561d179ac"],"state_sha256":"a50ed20b07448b1e0d07e5c9b5b86eb52fe52611c600f3ff66d8550e37fc2853"}