{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:CPOH7AWZRKIHCSNYBEWYFHYMIN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0e51281c1347977423095d3d4f8f39790e3e6393100840a7c4aca56852e29830","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-06-25T05:18:20Z","title_canon_sha256":"7f952fb8daa8b88439815c8a21ddbc6295bae90bcedbfff514db399ccdd77807"},"schema_version":"1.0","source":{"id":"2606.26613","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.26613","created_at":"2026-06-26T01:15:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.26613v1","created_at":"2026-06-26T01:15:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26613","created_at":"2026-06-26T01:15:36Z"},{"alias_kind":"pith_short_12","alias_value":"CPOH7AWZRKIH","created_at":"2026-06-26T01:15:36Z"},{"alias_kind":"pith_short_16","alias_value":"CPOH7AWZRKIHCSNY","created_at":"2026-06-26T01:15:36Z"},{"alias_kind":"pith_short_8","alias_value":"CPOH7AWZ","created_at":"2026-06-26T01:15:36Z"}],"graph_snapshots":[{"event_id":"sha256:425f36e28fd62f6d1308880c296ffac1323c73d3ffe450b3139e1d2f93bced77","target":"graph","created_at":"2026-06-26T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.26613/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The diverse formats of CSV and Parquet files in data lakes pose a significant challenge to traditional ETL, which relies on data engineers to pre-define a target database schema and build a complex pipeline for data integration. Moreover, with this approach, the integrated data often cannot support various analytical needs, as the predefined schema does not necessarily satisfy the table format or join relationships required to answer unforeseen queries. To address this, we propose EcoTable, the first natural language-based data integration framework. Given a set of user-specified natural langu","authors_text":"(2) Kuaishou Technology, (3) University of Arizona), Chengliang Chai (1), Fengjin Wang (2), Guoren Wang (1), Hangyu Zhao (1), Jinqi Liu (1), Lei Cao (3) ((1) Beijing Institute of Technology, Xin Tang (1), Ye Yuan (1), Yuhao Deng (1), Yuhui Wang (1), Yuyu Luo (1)","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-06-25T05:18:20Z","title":"EcoTable: Cost-effective Table Integration in Data Lakes for Natural Language Queries"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26613","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f5eaa249824a31daf595120772e451f962d3b935a1e0a9984eb0bc588ca1b292","target":"record","created_at":"2026-06-26T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0e51281c1347977423095d3d4f8f39790e3e6393100840a7c4aca56852e29830","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-06-25T05:18:20Z","title_canon_sha256":"7f952fb8daa8b88439815c8a21ddbc6295bae90bcedbfff514db399ccdd77807"},"schema_version":"1.0","source":{"id":"2606.26613","kind":"arxiv","version":1}},"canonical_sha256":"13dc7f82d98a907149b8092d829f0c43678ee41760911761b7820b3b7c6dc91d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"13dc7f82d98a907149b8092d829f0c43678ee41760911761b7820b3b7c6dc91d","first_computed_at":"2026-06-26T01:15:36.245268Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-26T01:15:36.245268Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"G/WZj6g5oOtETbude/JyXqscDCYeXzFH+ttP6M2sGtC9JvIBLZ0ynNoN2Q/joxKwlutDUUAvXUU+meAQ+89XDQ==","signature_status":"signed_v1","signed_at":"2026-06-26T01:15:36.245652Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.26613","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f5eaa249824a31daf595120772e451f962d3b935a1e0a9984eb0bc588ca1b292","sha256:425f36e28fd62f6d1308880c296ffac1323c73d3ffe450b3139e1d2f93bced77"],"state_sha256":"4d7eaae47d9a6d365eb7aa0c0b8eef5b4c30da99a6865492df9739d0208a9890"}