{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:JI3Z6YVCF5RLGNTHADWA23KVNN","short_pith_number":"pith:JI3Z6YVC","canonical_record":{"source":{"id":"2410.06703","kind":"arxiv","version":7},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","cross_cats_sorted":[],"title_canon_sha256":"23add7f293ce524fd2c9995226ef030c2f5d65ef6c8e054ae1bc8eab1abbf5b2","abstract_canon_sha256":"6675346fa5220e0cd03deb80eddda43e4b121494c02111057fbd7a0d301262b0"},"schema_version":"1.0"},"canonical_sha256":"4a379f62a22f62b3366700ec0d6d556b74a11e61e7ee5efdbfa03c45d356fc85","source":{"kind":"arxiv","id":"2410.06703","version":7},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2410.06703","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"arxiv_version","alias_value":"2410.06703v7","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.06703","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_12","alias_value":"JI3Z6YVCF5RL","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_16","alias_value":"JI3Z6YVCF5RLGNTH","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_8","alias_value":"JI3Z6YVC","created_at":"2026-06-05T01:15:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:JI3Z6YVCF5RLGNTHADWA23KVNN","target":"record","payload":{"canonical_record":{"source":{"id":"2410.06703","kind":"arxiv","version":7},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","cross_cats_sorted":[],"title_canon_sha256":"23add7f293ce524fd2c9995226ef030c2f5d65ef6c8e054ae1bc8eab1abbf5b2","abstract_canon_sha256":"6675346fa5220e0cd03deb80eddda43e4b121494c02111057fbd7a0d301262b0"},"schema_version":"1.0"},"canonical_sha256":"4a379f62a22f62b3366700ec0d6d556b74a11e61e7ee5efdbfa03c45d356fc85","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:12.007933Z","signature_b64":"wpP/veI/N3iInhK2JdkPpNfo4idsRLZlTjnsYJNjopVmwJwSvJ4G9VyUpXPjUJTEs8ZLe5SvYbj6lum0repBCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4a379f62a22f62b3366700ec0d6d556b74a11e61e7ee5efdbfa03c45d356fc85","last_reissued_at":"2026-06-05T01:15:12.007244Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:12.007244Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2410.06703","source_version":7,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:15:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8VcGwulg5YzK2CUYyTcmL5Ge2eHYCiLrRXVSV1602Sp9s1vXwO2WEEl1nP/XM6WcVqZK+GW4uSur57ejNYi5Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T16:46:39.464185Z"},"content_sha256":"7841653e6838f64ee9e1f0919590ce341ae8aa6877d907a7942e3315140c230d","schema_version":"1.0","event_id":"sha256:7841653e6838f64ee9e1f0919590ce341ae8aa6877d907a7942e3315140c230d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:JI3Z6YVCF5RLGNTHADWA23KVNN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ST-WebAgentBench: A Benchmark for Evaluating Safety and Trustworthiness in Web Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Alon Oved, Avi Yaeli, Ben Wiesel, Ido Levy, Nir Mashkif, Sami Marreed, Segev Shlomov","submitted_at":"2024-10-09T09:13:38Z","abstract_excerpt":"Autonomous web agents solve complex browsing tasks, yet existing benchmarks measure only whether an agent finishes a task, ignoring whether it does so safely or in a way enterprises can trust. To integrate these agents into critical workflows, safety and trustworthiness (ST) are prerequisite conditions for adoption. We introduce \\textbf{\\textsc{ST-WebAgentBench}}, a configurable and easily extensible suite for evaluating web agent ST across realistic enterprise scenarios. Each of its 222 tasks is paired with ST policies, concise rules that encode constraints, and is scored along six orthogonal"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2410.06703","kind":"arxiv","version":7},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2410.06703/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:15:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"utfXepDeWgPKbC+hfYBWDiRXdB7ZOx6RR2DMjvI1tovYnapxnqR+iTW6w37JCr0poDLrjkMec83k4RkmYDPuBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T16:46:39.464604Z"},"content_sha256":"37fec1918a202d199e450079f55b56ba2795ba3c042b737adcc74a871b1761c6","schema_version":"1.0","event_id":"sha256:37fec1918a202d199e450079f55b56ba2795ba3c042b737adcc74a871b1761c6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/JI3Z6YVCF5RLGNTHADWA23KVNN/bundle.json","state_url":"https://pith.science/pith/JI3Z6YVCF5RLGNTHADWA23KVNN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/JI3Z6YVCF5RLGNTHADWA23KVNN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T16:46:39Z","links":{"resolver":"https://pith.science/pith/JI3Z6YVCF5RLGNTHADWA23KVNN","bundle":"https://pith.science/pith/JI3Z6YVCF5RLGNTHADWA23KVNN/bundle.json","state":"https://pith.science/pith/JI3Z6YVCF5RLGNTHADWA23KVNN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/JI3Z6YVCF5RLGNTHADWA23KVNN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:JI3Z6YVCF5RLGNTHADWA23KVNN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6675346fa5220e0cd03deb80eddda43e4b121494c02111057fbd7a0d301262b0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","title_canon_sha256":"23add7f293ce524fd2c9995226ef030c2f5d65ef6c8e054ae1bc8eab1abbf5b2"},"schema_version":"1.0","source":{"id":"2410.06703","kind":"arxiv","version":7}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2410.06703","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"arxiv_version","alias_value":"2410.06703v7","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.06703","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_12","alias_value":"JI3Z6YVCF5RL","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_16","alias_value":"JI3Z6YVCF5RLGNTH","created_at":"2026-06-05T01:15:12Z"},{"alias_kind":"pith_short_8","alias_value":"JI3Z6YVC","created_at":"2026-06-05T01:15:12Z"}],"graph_snapshots":[{"event_id":"sha256:37fec1918a202d199e450079f55b56ba2795ba3c042b737adcc74a871b1761c6","target":"graph","created_at":"2026-06-05T01:15:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2410.06703/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Autonomous web agents solve complex browsing tasks, yet existing benchmarks measure only whether an agent finishes a task, ignoring whether it does so safely or in a way enterprises can trust. To integrate these agents into critical workflows, safety and trustworthiness (ST) are prerequisite conditions for adoption. We introduce \\textbf{\\textsc{ST-WebAgentBench}}, a configurable and easily extensible suite for evaluating web agent ST across realistic enterprise scenarios. Each of its 222 tasks is paired with ST policies, concise rules that encode constraints, and is scored along six orthogonal","authors_text":"Alon Oved, Avi Yaeli, Ben Wiesel, Ido Levy, Nir Mashkif, Sami Marreed, Segev Shlomov","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","title":"ST-WebAgentBench: A Benchmark for Evaluating Safety and Trustworthiness in Web Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2410.06703","kind":"arxiv","version":7},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7841653e6838f64ee9e1f0919590ce341ae8aa6877d907a7942e3315140c230d","target":"record","created_at":"2026-06-05T01:15:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6675346fa5220e0cd03deb80eddda43e4b121494c02111057fbd7a0d301262b0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2024-10-09T09:13:38Z","title_canon_sha256":"23add7f293ce524fd2c9995226ef030c2f5d65ef6c8e054ae1bc8eab1abbf5b2"},"schema_version":"1.0","source":{"id":"2410.06703","kind":"arxiv","version":7}},"canonical_sha256":"4a379f62a22f62b3366700ec0d6d556b74a11e61e7ee5efdbfa03c45d356fc85","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4a379f62a22f62b3366700ec0d6d556b74a11e61e7ee5efdbfa03c45d356fc85","first_computed_at":"2026-06-05T01:15:12.007244Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-05T01:15:12.007244Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wpP/veI/N3iInhK2JdkPpNfo4idsRLZlTjnsYJNjopVmwJwSvJ4G9VyUpXPjUJTEs8ZLe5SvYbj6lum0repBCg==","signature_status":"signed_v1","signed_at":"2026-06-05T01:15:12.007933Z","signed_message":"canonical_sha256_bytes"},"source_id":"2410.06703","source_kind":"arxiv","source_version":7}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7841653e6838f64ee9e1f0919590ce341ae8aa6877d907a7942e3315140c230d","sha256:37fec1918a202d199e450079f55b56ba2795ba3c042b737adcc74a871b1761c6"],"state_sha256":"5868706bc512e3b36f555bbb245222f48931bea4fc6d90130754266fcf111fad"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WtQsZe3GoGu6i0SVK1i7/+cqFWACI/UMss72R+UgNVATO3VuAkp2A/Zid1rWHHcB30uTjFvUOALFYU4VD8QgDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T16:46:39.467041Z","bundle_sha256":"ae82a6bc827122b399f3d7cb91cd78c4f148e0cc7ee9f1a941c10d4e8a563b15"}}