{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:OELEPP4IGTFEDTH3YY67TUBHDG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a92a818082904ecc9cfd52121ac018927b37a5e0da072f19dbac9795bb191d17","cross_cats_sorted":["cs.AI","cs.SE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","title_canon_sha256":"5f767b424c1af0fdea74e67824bb6478c1ed80b804f4a7841fcad123464b4aec"},"schema_version":"1.0","source":{"id":"2512.20732","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.20732","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"arxiv_version","alias_value":"2512.20732v2","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.20732","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_12","alias_value":"OELEPP4IGTFE","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_16","alias_value":"OELEPP4IGTFEDTH3","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_8","alias_value":"OELEPP4I","created_at":"2026-06-01T01:03:47Z"}],"graph_snapshots":[{"event_id":"sha256:ad737f1c2cbe0c1c57be71635dfadc217d77b5d2fa896ab636da1b772219d932","target":"graph","created_at":"2026-06-01T01:03:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.20732/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As LLMs advance their reasoning capabilities about the physical world, the absence of rigorous benchmarks for evaluating their ability to generate scientifically valid physical models has become a critical gap. Computational mechanics, which develops and applies mathematical models and numerical methods to predict the behavior of physical systems under forces, deformation, and constraints, provides an ideal foundation for structured scientific reasoning evaluation. Problems follow clear mathematical structure, enforce strict physical and numerical constraints, and support objective verificatio","authors_text":"Emma Lejeune, Erfan Hamdi, Joel Shor, Saeed Mohammadzadeh","cross_cats":["cs.AI","cs.SE"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","title":"FEM-Bench: A Structured Scientific Reasoning Benchmark for Evaluating Code-Generating LLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.20732","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a3289dadf6475acaccd6939178edc55ba022db1e767f0016a97076866db6f262","target":"record","created_at":"2026-06-01T01:03:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a92a818082904ecc9cfd52121ac018927b37a5e0da072f19dbac9795bb191d17","cross_cats_sorted":["cs.AI","cs.SE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","title_canon_sha256":"5f767b424c1af0fdea74e67824bb6478c1ed80b804f4a7841fcad123464b4aec"},"schema_version":"1.0","source":{"id":"2512.20732","kind":"arxiv","version":2}},"canonical_sha256":"711647bf8834ca41ccfbc63df9d027199211136275390bca9324d6bfd895f4f0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"711647bf8834ca41ccfbc63df9d027199211136275390bca9324d6bfd895f4f0","first_computed_at":"2026-06-01T01:03:47.949108Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:03:47.949108Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mlCMmDtiC3NivWFNu2Uhr7qfdayMCiz7oMETTp6+97+RX/A28X3Kk13Z+ZY+d2HbN6+WOkkmoEHjf2B7uaf1Cg==","signature_status":"signed_v1","signed_at":"2026-06-01T01:03:47.950438Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.20732","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a3289dadf6475acaccd6939178edc55ba022db1e767f0016a97076866db6f262","sha256:ad737f1c2cbe0c1c57be71635dfadc217d77b5d2fa896ab636da1b772219d932"],"state_sha256":"586ff107a17df9fe7ea11eab04493430b5a737201c8e18a2e1e813b59a0ba895"}