{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:APDPCZ7AZCDJMB3N3CBDYYLBLC","short_pith_number":"pith:APDPCZ7A","schema_version":"1.0","canonical_sha256":"03c6f167e0c88696076dd8823c6161589b76a20e7bd6290fddca0909838c1cdf","source":{"kind":"arxiv","id":"2606.06242","version":1},"attestation_state":"computed","paper":{"title":"Benchmarking Open-Source Layout Detection Models for Data Snapshot Extraction from Institutional Documents","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.IR"],"primary_cat":"cs.CL","authors_text":"Aivin V. Solatorio, AJ Carl P. Dy","submitted_at":"2026-06-04T14:47:40Z","abstract_excerpt":"Institutional documents contain substantial amounts of operational and analytical information embedded within figures and tables. Current approaches for extracting visual content from documents are largely built around generic document layout analysis, where figures and tables are treated as uniformly relevant document objects rather than semantically meaningful analytical artifacts. In this work, we introduce a benchmark dataset and evaluation framework for \\textit{data snapshot extraction}, the task of identifying and localizing semantically meaningful visual artifacts within institutional d"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.06242","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-04T14:47:40Z","cross_cats_sorted":["cs.AI","cs.CV","cs.IR"],"title_canon_sha256":"f523591774417afad25b0d3d4f4da242c6cb3220b7395a45dfedf9a73a3d9cbd","abstract_canon_sha256":"6a4348e0a7948acb52bec379b566d4de812d60c07e8ec7ac033c4f672c4710e9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:39.057304Z","signature_b64":"0nRx4CnWiFXJ1hC+rAIWY6NHPODRWCWRVDHzzvF0JqWfWHfnp2TXTogpNHIRBaVdh11shA1CCH5c06vVSi8dDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"03c6f167e0c88696076dd8823c6161589b76a20e7bd6290fddca0909838c1cdf","last_reissued_at":"2026-06-05T01:15:39.056871Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:39.056871Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Benchmarking Open-Source Layout Detection Models for Data Snapshot Extraction from Institutional Documents","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.IR"],"primary_cat":"cs.CL","authors_text":"Aivin V. Solatorio, AJ Carl P. Dy","submitted_at":"2026-06-04T14:47:40Z","abstract_excerpt":"Institutional documents contain substantial amounts of operational and analytical information embedded within figures and tables. Current approaches for extracting visual content from documents are largely built around generic document layout analysis, where figures and tables are treated as uniformly relevant document objects rather than semantically meaningful analytical artifacts. In this work, we introduce a benchmark dataset and evaluation framework for \\textit{data snapshot extraction}, the task of identifying and localizing semantically meaningful visual artifacts within institutional d"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06242","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.06242/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.06242","created_at":"2026-06-05T01:15:39.056937+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.06242v1","created_at":"2026-06-05T01:15:39.056937+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06242","created_at":"2026-06-05T01:15:39.056937+00:00"},{"alias_kind":"pith_short_12","alias_value":"APDPCZ7AZCDJ","created_at":"2026-06-05T01:15:39.056937+00:00"},{"alias_kind":"pith_short_16","alias_value":"APDPCZ7AZCDJMB3N","created_at":"2026-06-05T01:15:39.056937+00:00"},{"alias_kind":"pith_short_8","alias_value":"APDPCZ7A","created_at":"2026-06-05T01:15:39.056937+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC","json":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC.json","graph_json":"https://pith.science/api/pith-number/APDPCZ7AZCDJMB3N3CBDYYLBLC/graph.json","events_json":"https://pith.science/api/pith-number/APDPCZ7AZCDJMB3N3CBDYYLBLC/events.json","paper":"https://pith.science/paper/APDPCZ7A"},"agent_actions":{"view_html":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC","download_json":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC.json","view_paper":"https://pith.science/paper/APDPCZ7A","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.06242&json=true","fetch_graph":"https://pith.science/api/pith-number/APDPCZ7AZCDJMB3N3CBDYYLBLC/graph.json","fetch_events":"https://pith.science/api/pith-number/APDPCZ7AZCDJMB3N3CBDYYLBLC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC/action/storage_attestation","attest_author":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC/action/author_attestation","sign_citation":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC/action/citation_signature","submit_replication":"https://pith.science/pith/APDPCZ7AZCDJMB3N3CBDYYLBLC/action/replication_record"}},"created_at":"2026-06-05T01:15:39.056937+00:00","updated_at":"2026-06-05T01:15:39.056937+00:00"}