{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:WO7D6S5W62NRVVL3ZXHIXAE7FC","short_pith_number":"pith:WO7D6S5W","schema_version":"1.0","canonical_sha256":"b3be3f4bb6f69b1ad57bcdce8b809f28aa62d2fe179d89b12c00cafddc89e844","source":{"kind":"arxiv","id":"2602.08235","version":2},"attestation_state":"computed","paper":{"title":"When Benign Inputs Lead to Severe Harms: Eliciting Unsafe Unintended Behaviors of Computer-Use Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CR"],"primary_cat":"cs.CL","authors_text":"Dawn Song, Eric Fosler-Lussier, Huan Sun, Jaylen Jones, Pierre-Luc St-Charles, Yoshua Bengio, Yu Su, Yuting Ning, Zhehao Zhang","submitted_at":"2026-02-09T03:20:11Z","abstract_excerpt":"Although computer-use agents (CUAs) hold significant potential to automate increasingly complex OS workflows, they can demonstrate unsafe unintended behaviors that deviate from expected outcomes even under benign input contexts. However, exploration of this risk remains largely anecdotal, lacking concrete characterization and automated methods to proactively surface long-tail unintended behaviors under realistic CUA scenarios. To fill this gap, we introduce the first conceptual and methodological framework for unintended CUA behaviors, by defining their key characteristics, automatically elici"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.08235","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-02-09T03:20:11Z","cross_cats_sorted":["cs.AI","cs.CR"],"title_canon_sha256":"efc6e670330bfd2a026d7604774e362df1fde66ba04655c9b5eff5aac6674e07","abstract_canon_sha256":"724a5a837ffcde83a7abb52fcd773043ee13431eaf59ba79a49a7550be57d77a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:14.051104Z","signature_b64":"YaGPHAwU/sOm1ww3yBDIaPVt5fhndDaISQoCgNgLgyfKgZtfsYuhCKm9amogkrbzB38y4SGAAdLMhTkxUZULDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b3be3f4bb6f69b1ad57bcdce8b809f28aa62d2fe179d89b12c00cafddc89e844","last_reissued_at":"2026-06-09T01:05:14.050591Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:14.050591Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"When Benign Inputs Lead to Severe Harms: Eliciting Unsafe Unintended Behaviors of Computer-Use Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CR"],"primary_cat":"cs.CL","authors_text":"Dawn Song, Eric Fosler-Lussier, Huan Sun, Jaylen Jones, Pierre-Luc St-Charles, Yoshua Bengio, Yu Su, Yuting Ning, Zhehao Zhang","submitted_at":"2026-02-09T03:20:11Z","abstract_excerpt":"Although computer-use agents (CUAs) hold significant potential to automate increasingly complex OS workflows, they can demonstrate unsafe unintended behaviors that deviate from expected outcomes even under benign input contexts. However, exploration of this risk remains largely anecdotal, lacking concrete characterization and automated methods to proactively surface long-tail unintended behaviors under realistic CUA scenarios. To fill this gap, we introduce the first conceptual and methodological framework for unintended CUA behaviors, by defining their key characteristics, automatically elici"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.08235","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.08235/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.08235","created_at":"2026-06-09T01:05:14.050657+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.08235v2","created_at":"2026-06-09T01:05:14.050657+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.08235","created_at":"2026-06-09T01:05:14.050657+00:00"},{"alias_kind":"pith_short_12","alias_value":"WO7D6S5W62NR","created_at":"2026-06-09T01:05:14.050657+00:00"},{"alias_kind":"pith_short_16","alias_value":"WO7D6S5W62NRVVL3","created_at":"2026-06-09T01:05:14.050657+00:00"},{"alias_kind":"pith_short_8","alias_value":"WO7D6S5W","created_at":"2026-06-09T01:05:14.050657+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2605.11003","citing_title":"The Authorization-Execution Gap Is a Major Safety and Security Problem in Open-World Agents","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2604.23772","citing_title":"PageGuide: Browser extension to assist users in navigating a webpage and locating information","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06731","citing_title":"When Routine Chats Turn Toxic: Unintended Long-Term State Poisoning in Personalized Agents","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07110","citing_title":"Securing Computer-Use Agents: A Unified Architecture-Lifecycle Framework for Deployment-Grounded Reliability","ref_index":176,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC","json":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC.json","graph_json":"https://pith.science/api/pith-number/WO7D6S5W62NRVVL3ZXHIXAE7FC/graph.json","events_json":"https://pith.science/api/pith-number/WO7D6S5W62NRVVL3ZXHIXAE7FC/events.json","paper":"https://pith.science/paper/WO7D6S5W"},"agent_actions":{"view_html":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC","download_json":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC.json","view_paper":"https://pith.science/paper/WO7D6S5W","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.08235&json=true","fetch_graph":"https://pith.science/api/pith-number/WO7D6S5W62NRVVL3ZXHIXAE7FC/graph.json","fetch_events":"https://pith.science/api/pith-number/WO7D6S5W62NRVVL3ZXHIXAE7FC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC/action/storage_attestation","attest_author":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC/action/author_attestation","sign_citation":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC/action/citation_signature","submit_replication":"https://pith.science/pith/WO7D6S5W62NRVVL3ZXHIXAE7FC/action/replication_record"}},"created_at":"2026-06-09T01:05:14.050657+00:00","updated_at":"2026-06-09T01:05:14.050657+00:00"}