{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LYASPHITZK2CVPQSYTYP4GW36T","short_pith_number":"pith:LYASPHIT","schema_version":"1.0","canonical_sha256":"5e01279d13cab42abe12c4f0fe1adbf4cc6d05cfdd0eb39fdfeab99978a09100","source":{"kind":"arxiv","id":"2606.08531","version":1},"attestation_state":"computed","paper":{"title":"VESTA: A Fully Automated Scenario Generation and Safety Evaluation Framework for LLM Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Dongqi Liang, Feifei Zhao, Haibo Tong, Jindong Li, Lu Jia, Ping Wu, Qian Zhang, Yi Zeng","submitted_at":"2026-06-07T09:23:38Z","abstract_excerpt":"Large language models (LLMs) are increasingly evolving from simple text-based interaction systems into LLM agents that can maintain memory, use tools, access external environments, and execute tasks. As their capabilities and autonomy expand, the safety risks they face also become more diverse. Existing evaluations often rely on manually written scenarios, static prompts, or final-output judgments, making it difficult to capture the diverse risks that agents may face during task execution. We introduce VESTA, a fully automated scenario generation and safety evaluation framework for LLM agents."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.08531","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T09:23:38Z","cross_cats_sorted":[],"title_canon_sha256":"aa77314047b799438a065a9daf2cb6d979965a69f68fc99d6a8983cb638148f1","abstract_canon_sha256":"fbe114b1c5b0bea82f7e80d6deed6dbe36426dbb438b4f775f4a961a52588182"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:39.248301Z","signature_b64":"Zol56xsviGQaNh7sQ/6z98LyNL9XTp5A3MdvVn5NqZShzJUoFvfQAGSdbNtBd72vQhe3E2AzdTWVchzybsp1DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5e01279d13cab42abe12c4f0fe1adbf4cc6d05cfdd0eb39fdfeab99978a09100","last_reissued_at":"2026-06-09T01:05:39.247863Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:39.247863Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"VESTA: A Fully Automated Scenario Generation and Safety Evaluation Framework for LLM Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Dongqi Liang, Feifei Zhao, Haibo Tong, Jindong Li, Lu Jia, Ping Wu, Qian Zhang, Yi Zeng","submitted_at":"2026-06-07T09:23:38Z","abstract_excerpt":"Large language models (LLMs) are increasingly evolving from simple text-based interaction systems into LLM agents that can maintain memory, use tools, access external environments, and execute tasks. As their capabilities and autonomy expand, the safety risks they face also become more diverse. Existing evaluations often rely on manually written scenarios, static prompts, or final-output judgments, making it difficult to capture the diverse risks that agents may face during task execution. We introduce VESTA, a fully automated scenario generation and safety evaluation framework for LLM agents."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08531","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08531/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.08531","created_at":"2026-06-09T01:05:39.247925+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.08531v1","created_at":"2026-06-09T01:05:39.247925+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08531","created_at":"2026-06-09T01:05:39.247925+00:00"},{"alias_kind":"pith_short_12","alias_value":"LYASPHITZK2C","created_at":"2026-06-09T01:05:39.247925+00:00"},{"alias_kind":"pith_short_16","alias_value":"LYASPHITZK2CVPQS","created_at":"2026-06-09T01:05:39.247925+00:00"},{"alias_kind":"pith_short_8","alias_value":"LYASPHIT","created_at":"2026-06-09T01:05:39.247925+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T","json":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T.json","graph_json":"https://pith.science/api/pith-number/LYASPHITZK2CVPQSYTYP4GW36T/graph.json","events_json":"https://pith.science/api/pith-number/LYASPHITZK2CVPQSYTYP4GW36T/events.json","paper":"https://pith.science/paper/LYASPHIT"},"agent_actions":{"view_html":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T","download_json":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T.json","view_paper":"https://pith.science/paper/LYASPHIT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.08531&json=true","fetch_graph":"https://pith.science/api/pith-number/LYASPHITZK2CVPQSYTYP4GW36T/graph.json","fetch_events":"https://pith.science/api/pith-number/LYASPHITZK2CVPQSYTYP4GW36T/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T/action/storage_attestation","attest_author":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T/action/author_attestation","sign_citation":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T/action/citation_signature","submit_replication":"https://pith.science/pith/LYASPHITZK2CVPQSYTYP4GW36T/action/replication_record"}},"created_at":"2026-06-09T01:05:39.247925+00:00","updated_at":"2026-06-09T01:05:39.247925+00:00"}