{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:MMSWSUWTKYPYTKGKPY2EXP272I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a912dbafa4ed922bfb5418b6a83e3672f2ce566dac99a2a91f034d3863a6fdf1","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","title_canon_sha256":"adb989ba0b0e944dca90f3815d337664e06b8c4a176a9579a8a42bda4422ed0f"},"schema_version":"1.0","source":{"id":"2606.11816","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11816","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11816v1","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11816","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_12","alias_value":"MMSWSUWTKYPY","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_16","alias_value":"MMSWSUWTKYPYTKGK","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_8","alias_value":"MMSWSUWT","created_at":"2026-06-11T01:10:09Z"}],"graph_snapshots":[{"event_id":"sha256:086f661a2aae5e25890c07a6af4445b4d049ac7b23b7daee1f8fa1217b2cb63d","target":"graph","created_at":"2026-06-11T01:10:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.11816/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Forecasting real-world events requires language-model agents to reason under uncertainty from incomplete, time-bounded information. Yet evaluating whether agents genuinely forecast requires more than final-answer accuracy: a model may be correct by recalling memorized training facts, citing fabricated evidence, or producing an unsupported causal story. We present WorldReasoner, an evaluation framework for temporally valid event forecasting. Each task gives an agent a resolved forecasting question, a simulated forecast date, and access only to evidence available before that date; after resoluti","authors_text":"Andreas Vlachos, Eric Chamoun, Yizhou Chi, Zifeng Ding","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","title":"WorldReasoner: Evaluating Whether Language Model Agents Forecast Events with Valid Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11816","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fd4a8acdc036de3b071100dda223b809ea8ba6ce09f405f6935ed9c5a8914eb1","target":"record","created_at":"2026-06-11T01:10:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a912dbafa4ed922bfb5418b6a83e3672f2ce566dac99a2a91f034d3863a6fdf1","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","title_canon_sha256":"adb989ba0b0e944dca90f3815d337664e06b8c4a176a9579a8a42bda4422ed0f"},"schema_version":"1.0","source":{"id":"2606.11816","kind":"arxiv","version":1}},"canonical_sha256":"63256952d3561f89a8ca7e344bbf5fd231d05f6b7093bedd4b42a2c1bb7a1190","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"63256952d3561f89a8ca7e344bbf5fd231d05f6b7093bedd4b42a2c1bb7a1190","first_computed_at":"2026-06-11T01:10:09.618522Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T01:10:09.618522Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JGnjhfGvMC+mitjXAKUsOx4OtPpnu+bRTaU8qqLcm0sqE5P7w+02oP32HyH0TFr559voteD5xaEHABPsU/77DQ==","signature_status":"signed_v1","signed_at":"2026-06-11T01:10:09.619359Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.11816","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fd4a8acdc036de3b071100dda223b809ea8ba6ce09f405f6935ed9c5a8914eb1","sha256:086f661a2aae5e25890c07a6af4445b4d049ac7b23b7daee1f8fa1217b2cb63d"],"state_sha256":"e073be1ac9020a16c24c5bfe365cd53c0c61264bfe2d0ea74e974e337e6cb2ec"}