{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:JI3Z6YVCF5RLGNTHADWA23KVNN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6675346fa5220e0cd03deb80eddda43e4b121494c02111057fbd7a0d301262b0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","title_canon_sha256":"23add7f293ce524fd2c9995226ef030c2f5d65ef6c8e054ae1bc8eab1abbf5b2"},"schema_version":"1.0","source":{"id":"2410.06703","kind":"arxiv","version":7}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2410.06703","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"arxiv_version","alias_value":"2410.06703v7","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.06703","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_12","alias_value":"JI3Z6YVCF5RL","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_16","alias_value":"JI3Z6YVCF5RLGNTH","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_8","alias_value":"JI3Z6YVC","created_at":"2026-06-05T01:15:12Z"}],"graph_snapshots":[{"event_id":"sha256:37fec1918a202d199e450079f55b56ba2795ba3c042b737adcc74a871b1761c6","target":"graph","created_at":"2026-06-05T01:15:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2410.06703/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Autonomous web agents solve complex browsing tasks, yet existing benchmarks measure only whether an agent finishes a task, ignoring whether it does so safely or in a way enterprises can trust. To integrate these agents into critical workflows, safety and trustworthiness (ST) are prerequisite conditions for adoption. We introduce \\textbf{\\textsc{ST-WebAgentBench}}, a configurable and easily extensible suite for evaluating web agent ST across realistic enterprise scenarios. Each of its 222 tasks is paired with ST policies, concise rules that encode constraints, and is scored along six orthogonal","authors_text":"Alon Oved, Avi Yaeli, Ben Wiesel, Ido Levy, Nir Mashkif, Sami Marreed, Segev Shlomov","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","title":"ST-WebAgentBench: A Benchmark for Evaluating Safety and Trustworthiness in Web Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2410.06703","kind":"arxiv","version":7},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7841653e6838f64ee9e1f0919590ce341ae8aa6877d907a7942e3315140c230d","target":"record","created_at":"2026-06-05T01:15:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6675346fa5220e0cd03deb80eddda43e4b121494c02111057fbd7a0d301262b0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","title_canon_sha256":"23add7f293ce524fd2c9995226ef030c2f5d65ef6c8e054ae1bc8eab1abbf5b2"},"schema_version":"1.0","source":{"id":"2410.06703","kind":"arxiv","version":7}},"canonical_sha256":"4a379f62a22f62b3366700ec0d6d556b74a11e61e7ee5efdbfa03c45d356fc85","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4a379f62a22f62b3366700ec0d6d556b74a11e61e7ee5efdbfa03c45d356fc85","first_computed_at":"2026-06-05T01:15:12.007244Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-05T01:15:12.007244Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wpP/veI/N3iInhK2JdkPpNfo4idsRLZlTjnsYJNjopVmwJwSvJ4G9VyUpXPjUJTEs8ZLe5SvYbj6lum0repBCg==","signature_status":"signed_v1","signed_at":"2026-06-05T01:15:12.007933Z","signed_message":"canonical_sha256_bytes"},"source_id":"2410.06703","source_kind":"arxiv","source_version":7}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7841653e6838f64ee9e1f0919590ce341ae8aa6877d907a7942e3315140c230d","sha256:37fec1918a202d199e450079f55b56ba2795ba3c042b737adcc74a871b1761c6"],"state_sha256":"5868706bc512e3b36f555bbb245222f48931bea4fc6d90130754266fcf111fad"}