{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YSCSTBRTR5GAQX6ICV73QGEZYL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2af2a39dc278b170cdb7fd2c5e38b8779ee83861a41403aadfd2bd3045c99633","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-02T08:09:10Z","title_canon_sha256":"916705494a652147fdea2d475925bc0fa590135b7ce57bb5ca18b0862ae5e751"},"schema_version":"1.0","source":{"id":"2607.00007","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.00007","created_at":"2026-07-02T00:18:04Z"},{"alias_kind":"arxiv_version","alias_value":"2607.00007v1","created_at":"2026-07-02T00:18:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.00007","created_at":"2026-07-02T00:18:04Z"},{"alias_kind":"pith_short_12","alias_value":"YSCSTBRTR5GA","created_at":"2026-07-02T00:18:04Z"},{"alias_kind":"pith_short_16","alias_value":"YSCSTBRTR5GAQX6I","created_at":"2026-07-02T00:18:04Z"},{"alias_kind":"pith_short_8","alias_value":"YSCSTBRT","created_at":"2026-07-02T00:18:04Z"}],"graph_snapshots":[{"event_id":"sha256:270bb32576331f38a0f1d469adfca65c19340f5be32fc6a6d4f4e5fb7e3c004e","target":"graph","created_at":"2026-07-02T00:18:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2607.00007/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language model (LLM)-based web agents reduce manual scripting for web data collection, yet on live websites, they often miss relevant pages, return incomplete multimodal outputs, or return media URLs that are not directly downloadable. We present BFS-and-Reflection Agent (BaRA), a framework for site-level collection under a fixed interaction budget. The framework combines bounded breadth-first search (BFS) traversal with history-based self-reflection. We evaluate BaRA on 50 synthetic websites with ground-truth reference sets. We additionally test on three public websites with cluttered o","authors_text":"Joseph Lee, Kyungwoo Song, Soojeong Lee, Sunjae Kim, Yongseong Cho, Youngwoo Moon","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-02T08:09:10Z","title":"BaRA: BFS-and-Reflection Web Data Collection Agent"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.00007","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d9b5e7abd7643d218c5b5e5ce0ea7e0dc6d7b518e361a693049144c6fb36088d","target":"record","created_at":"2026-07-02T00:18:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2af2a39dc278b170cdb7fd2c5e38b8779ee83861a41403aadfd2bd3045c99633","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-02T08:09:10Z","title_canon_sha256":"916705494a652147fdea2d475925bc0fa590135b7ce57bb5ca18b0862ae5e751"},"schema_version":"1.0","source":{"id":"2607.00007","kind":"arxiv","version":1}},"canonical_sha256":"c4852986338f4c085fc8157fb81899c2cb2a57aa08531669e07396dd01cef3aa","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c4852986338f4c085fc8157fb81899c2cb2a57aa08531669e07396dd01cef3aa","first_computed_at":"2026-07-02T00:18:04.294299Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-02T00:18:04.294299Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"glG8h9ovkX0sL1/7QHq/O3NLkq/PSS7HxXq+7OlTwjmxHqGtt6Qro9rtHFx6e+sRPVlt8r+6VYg11MsJEznFAQ==","signature_status":"signed_v1","signed_at":"2026-07-02T00:18:04.294875Z","signed_message":"canonical_sha256_bytes"},"source_id":"2607.00007","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d9b5e7abd7643d218c5b5e5ce0ea7e0dc6d7b518e361a693049144c6fb36088d","sha256:270bb32576331f38a0f1d469adfca65c19340f5be32fc6a6d4f4e5fb7e3c004e"],"state_sha256":"f20a4c2ef6bb1353968d618cd1eb87e2754b459bb8310cd8baaebd0b46652089"}