{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:N6RK7NHVJQFUPPBD5UH4JPUXGC","short_pith_number":"pith:N6RK7NHV","canonical_record":{"source":{"id":"2501.06659","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"69979ec0b0c135e5ff539259a0e0b7fbb2d4a0ba23db71a388518571b4b455c7","abstract_canon_sha256":"5904112b23a1ed31ff4e1f0f4507073fe2e9deb1ee5fcc0a57c19e2a0700a210"},"schema_version":"1.0"},"canonical_sha256":"6fa2afb4f54c0b47bc23ed0fc4be9730809cf2a434a5e49fae721a5a926d01eb","source":{"kind":"arxiv","id":"2501.06659","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.06659","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"arxiv_version","alias_value":"2501.06659v2","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.06659","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_12","alias_value":"N6RK7NHVJQFU","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_16","alias_value":"N6RK7NHVJQFUPPBD","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_8","alias_value":"N6RK7NHV","created_at":"2026-06-09T02:07:01Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:N6RK7NHVJQFUPPBD5UH4JPUXGC","target":"record","payload":{"canonical_record":{"source":{"id":"2501.06659","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"69979ec0b0c135e5ff539259a0e0b7fbb2d4a0ba23db71a388518571b4b455c7","abstract_canon_sha256":"5904112b23a1ed31ff4e1f0f4507073fe2e9deb1ee5fcc0a57c19e2a0700a210"},"schema_version":"1.0"},"canonical_sha256":"6fa2afb4f54c0b47bc23ed0fc4be9730809cf2a434a5e49fae721a5a926d01eb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:01.710502Z","signature_b64":"VRKo6tmzzsl8ZdRstU8ucu5n1wEdNSTq6Y9L/iJMFTQMgx7yU574CfY0owM1SAWY2TANUINXnoqJi8agCacYCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6fa2afb4f54c0b47bc23ed0fc4be9730809cf2a434a5e49fae721a5a926d01eb","last_reissued_at":"2026-06-09T02:07:01.709457Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:01.709457Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2501.06659","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"o/BTWyzOj8HRy+IwOY9SwxNoPMbZjeDHuCpkupF7ESs3tdiFrxV4wNW2EzhOLjvCbDA9FZ8Mot50Kq6P4lDkAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T13:12:41.189925Z"},"content_sha256":"6ae4bec224282751b59f716e4984d3d02c07ce8ab78096530a7f5a5a8ab6ab15","schema_version":"1.0","event_id":"sha256:6ae4bec224282751b59f716e4984d3d02c07ce8ab78096530a7f5a5a8ab6ab15"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:N6RK7NHVJQFUPPBD5UH4JPUXGC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Visual Template Inference for Data Extraction from Documents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.DB","authors_text":"Aditya G. Parameswaran, Alvin Cheung, Mawil Hasan, Rohan Kosalge, Yiming Lin","submitted_at":"2025-01-11T23:07:04Z","abstract_excerpt":"Many templatized documents are programmatically generated from structured data following a visual template. Such documents include invoices, tax documents, financial reports, and purchase orders. Effective data extraction from these documents is crucial to support downstream analytical tasks. Current data extraction tools often struggle with complex document layouts, incur high latency and/or cost on large datasets, and require significant human effort. The key insight of our tool, TWIX, is to infer the underlying template used to create such documents, and then extract the data, rather than e"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2501.06659","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2501.06659/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+oeoXrbcFTazKNKHQ+qFfurci6yWVrGwuswK40RIR8LBODJGlzYJ39TVbiMnMBjqzRzrz/uJ8jvrgSG+t0UrCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T13:12:41.191177Z"},"content_sha256":"de5ca273271d5817db35f0d973c42bfa79f19ab88ba6fb49b00dc34e428d49fe","schema_version":"1.0","event_id":"sha256:de5ca273271d5817db35f0d973c42bfa79f19ab88ba6fb49b00dc34e428d49fe"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/N6RK7NHVJQFUPPBD5UH4JPUXGC/bundle.json","state_url":"https://pith.science/pith/N6RK7NHVJQFUPPBD5UH4JPUXGC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/N6RK7NHVJQFUPPBD5UH4JPUXGC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T13:12:41Z","links":{"resolver":"https://pith.science/pith/N6RK7NHVJQFUPPBD5UH4JPUXGC","bundle":"https://pith.science/pith/N6RK7NHVJQFUPPBD5UH4JPUXGC/bundle.json","state":"https://pith.science/pith/N6RK7NHVJQFUPPBD5UH4JPUXGC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/N6RK7NHVJQFUPPBD5UH4JPUXGC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:N6RK7NHVJQFUPPBD5UH4JPUXGC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5904112b23a1ed31ff4e1f0f4507073fe2e9deb1ee5fcc0a57c19e2a0700a210","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","title_canon_sha256":"69979ec0b0c135e5ff539259a0e0b7fbb2d4a0ba23db71a388518571b4b455c7"},"schema_version":"1.0","source":{"id":"2501.06659","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.06659","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"arxiv_version","alias_value":"2501.06659v2","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.06659","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_12","alias_value":"N6RK7NHVJQFU","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_16","alias_value":"N6RK7NHVJQFUPPBD","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_8","alias_value":"N6RK7NHV","created_at":"2026-06-09T02:07:01Z"}],"graph_snapshots":[{"event_id":"sha256:de5ca273271d5817db35f0d973c42bfa79f19ab88ba6fb49b00dc34e428d49fe","target":"graph","created_at":"2026-06-09T02:07:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2501.06659/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Many templatized documents are programmatically generated from structured data following a visual template. Such documents include invoices, tax documents, financial reports, and purchase orders. Effective data extraction from these documents is crucial to support downstream analytical tasks. Current data extraction tools often struggle with complex document layouts, incur high latency and/or cost on large datasets, and require significant human effort. The key insight of our tool, TWIX, is to infer the underlying template used to create such documents, and then extract the data, rather than e","authors_text":"Aditya G. Parameswaran, Alvin Cheung, Mawil Hasan, Rohan Kosalge, Yiming Lin","cross_cats":["cs.CV"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","title":"Visual Template Inference for Data Extraction from Documents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2501.06659","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6ae4bec224282751b59f716e4984d3d02c07ce8ab78096530a7f5a5a8ab6ab15","target":"record","created_at":"2026-06-09T02:07:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5904112b23a1ed31ff4e1f0f4507073fe2e9deb1ee5fcc0a57c19e2a0700a210","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","title_canon_sha256":"69979ec0b0c135e5ff539259a0e0b7fbb2d4a0ba23db71a388518571b4b455c7"},"schema_version":"1.0","source":{"id":"2501.06659","kind":"arxiv","version":2}},"canonical_sha256":"6fa2afb4f54c0b47bc23ed0fc4be9730809cf2a434a5e49fae721a5a926d01eb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6fa2afb4f54c0b47bc23ed0fc4be9730809cf2a434a5e49fae721a5a926d01eb","first_computed_at":"2026-06-09T02:07:01.709457Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:01.709457Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VRKo6tmzzsl8ZdRstU8ucu5n1wEdNSTq6Y9L/iJMFTQMgx7yU574CfY0owM1SAWY2TANUINXnoqJi8agCacYCQ==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:01.710502Z","signed_message":"canonical_sha256_bytes"},"source_id":"2501.06659","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6ae4bec224282751b59f716e4984d3d02c07ce8ab78096530a7f5a5a8ab6ab15","sha256:de5ca273271d5817db35f0d973c42bfa79f19ab88ba6fb49b00dc34e428d49fe"],"state_sha256":"a831cf16922df115ac1ec289e24c1326784ee12c2f335426d8aedafdd7d19ffd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oaX/N+bS0sMM6Vu4lO5OngB1piIf0fbwsobfpDkhIp6mPlAvbGsaR4bLbyAEw5Hjbu1QjP9tFPC0l4831ORvCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T13:12:41.195156Z","bundle_sha256":"155d3454e2097d734fbb8c086a51ea6c729ea3218c9af483936b20c3452a5319"}}