{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:IPCJLYTXHABXCBJAVSZU3LTGLN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"72c1e83319f205daaee13ea82fa256d113eaaff6a0bc23fc47e3504527e4dfb4","cross_cats_sorted":["cs.CL","cs.CY","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-06-04T14:46:47Z","title_canon_sha256":"89c507d37bb5e5d3d2a6d1cd9fbde08db83fffc0f3aa5eb22dc416468be6577b"},"schema_version":"1.0","source":{"id":"2506.04018","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2506.04018","created_at":"2026-06-23T03:13:45Z"},{"alias_kind":"arxiv_version","alias_value":"2506.04018v3","created_at":"2026-06-23T03:13:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.04018","created_at":"2026-06-23T03:13:45Z"},{"alias_kind":"pith_short_12","alias_value":"IPCJLYTXHABX","created_at":"2026-06-23T03:13:45Z"},{"alias_kind":"pith_short_16","alias_value":"IPCJLYTXHABXCBJA","created_at":"2026-06-23T03:13:45Z"},{"alias_kind":"pith_short_8","alias_value":"IPCJLYTX","created_at":"2026-06-23T03:13:45Z"}],"graph_snapshots":[{"event_id":"sha256:5804256584d8875b8dba118459f51cc6be1c6ab18e9574d4c17c3a9899e67069","target":"graph","created_at":"2026-06-23T03:13:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2506.04018/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As Large Language Model (LLM) agents become more widespread, associated misalignment risks increase. While prior research has studied agents' ability to produce harmful outputs or follow malicious instructions, it remains unclear how likely agents are to spontaneously pursue unintended goals in realistic deployments. In this work, we approach misalignment as a conflict between the internal goals pursued by the model and the goals intended by its deployer. We introduce a misalignment propensity benchmark, \\textsc{AgentMisalignment}, a benchmark suite designed to evaluate the propensity of LLM a","authors_text":"Akshat Naik, Edward James Young, Emma Goun\\'e, Francisco Javier Campos Zabala, Guillermo Bosch, Jason Ross Brown, Patrick Quinn","cross_cats":["cs.CL","cs.CY","cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-06-04T14:46:47Z","title":"AgentMisalignment: Measuring the Propensity for Misaligned Behaviour in LLM-Based Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.04018","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f154d5b6b9d02cd2b78191f28657ff32ce6d53e1bc755bbd6184c2de8dab4a57","target":"record","created_at":"2026-06-23T03:13:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"72c1e83319f205daaee13ea82fa256d113eaaff6a0bc23fc47e3504527e4dfb4","cross_cats_sorted":["cs.CL","cs.CY","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-06-04T14:46:47Z","title_canon_sha256":"89c507d37bb5e5d3d2a6d1cd9fbde08db83fffc0f3aa5eb22dc416468be6577b"},"schema_version":"1.0","source":{"id":"2506.04018","kind":"arxiv","version":3}},"canonical_sha256":"43c495e2773803710520acb34dae665b5d771af3ccfab5b7bf0dbe8f152e98e8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"43c495e2773803710520acb34dae665b5d771af3ccfab5b7bf0dbe8f152e98e8","first_computed_at":"2026-06-23T03:13:45.788787Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T03:13:45.788787Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"peqYH4+KB0Gfr3DQQIy+HB4p+AS8ZcBN2V7D6VUq3+cDCuHgHXdGps0jTLsp3ri5cs6kovDjPCShp2ay0gWRCA==","signature_status":"signed_v1","signed_at":"2026-06-23T03:13:45.789443Z","signed_message":"canonical_sha256_bytes"},"source_id":"2506.04018","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f154d5b6b9d02cd2b78191f28657ff32ce6d53e1bc755bbd6184c2de8dab4a57","sha256:5804256584d8875b8dba118459f51cc6be1c6ab18e9574d4c17c3a9899e67069"],"state_sha256":"90475adb7bbdc447a2ae86b7502771d1c4f9c236b8de9dd49cc17c31488a4ad5"}