{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OPT7NIN3RVP6QVPQYJIJFVPVLM","short_pith_number":"pith:OPT7NIN3","schema_version":"1.0","canonical_sha256":"73e7f6a1bb8d5fe855f0c25092d5f55b333f0eeeefaea1157b2f8e4451e39566","source":{"kind":"arxiv","id":"2606.26990","version":1},"attestation_state":"computed","paper":{"title":"Decision-Aligned Evaluation of Uncertainty Quantification","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Annika Schneider, Joshua Stiller, Tommy Rochussen, Vincent Fortuin","submitted_at":"2026-06-25T13:05:41Z","abstract_excerpt":"Uncertainty estimates in machine learning are typically evaluated using generic metrics such as the negative log-likelihood and expected calibration error, yet good performance on such metrics does not necessarily imply high utility in downstream decisions. We introduce decision-alignment, a criterion that reveals which evaluation metrics meaningfully align with downstream utilities. Applying this framework, we show that many widely used uncertainty metrics are either misaligned with common decision problems or encode pathological prior beliefs about the downstream task. We then propose prior-"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.26990","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T13:05:41Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"60ba039a2043aad6a4e33e3885b8494da549f719eff3ee6914fe735f2332a0af","abstract_canon_sha256":"2bc04689af48b89ecc3a77389c7a58f29db6dc0ba4fd638760ced3f9c1d858d9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:16:06.098241Z","signature_b64":"cDmUYsBDclmEG7Hd4bCFhgdumA0po1BQ+GSgwnAz2BBgd4FKj48H9PBhkHA4pyEPjYYoQcNigveOt/aRU0c2Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"73e7f6a1bb8d5fe855f0c25092d5f55b333f0eeeefaea1157b2f8e4451e39566","last_reissued_at":"2026-06-26T01:16:06.097865Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:16:06.097865Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Decision-Aligned Evaluation of Uncertainty Quantification","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Annika Schneider, Joshua Stiller, Tommy Rochussen, Vincent Fortuin","submitted_at":"2026-06-25T13:05:41Z","abstract_excerpt":"Uncertainty estimates in machine learning are typically evaluated using generic metrics such as the negative log-likelihood and expected calibration error, yet good performance on such metrics does not necessarily imply high utility in downstream decisions. We introduce decision-alignment, a criterion that reveals which evaluation metrics meaningfully align with downstream utilities. Applying this framework, we show that many widely used uncertainty metrics are either misaligned with common decision problems or encode pathological prior beliefs about the downstream task. We then propose prior-"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26990","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.26990/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.26990","created_at":"2026-06-26T01:16:06.097930+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.26990v1","created_at":"2026-06-26T01:16:06.097930+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26990","created_at":"2026-06-26T01:16:06.097930+00:00"},{"alias_kind":"pith_short_12","alias_value":"OPT7NIN3RVP6","created_at":"2026-06-26T01:16:06.097930+00:00"},{"alias_kind":"pith_short_16","alias_value":"OPT7NIN3RVP6QVPQ","created_at":"2026-06-26T01:16:06.097930+00:00"},{"alias_kind":"pith_short_8","alias_value":"OPT7NIN3","created_at":"2026-06-26T01:16:06.097930+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM","json":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM.json","graph_json":"https://pith.science/api/pith-number/OPT7NIN3RVP6QVPQYJIJFVPVLM/graph.json","events_json":"https://pith.science/api/pith-number/OPT7NIN3RVP6QVPQYJIJFVPVLM/events.json","paper":"https://pith.science/paper/OPT7NIN3"},"agent_actions":{"view_html":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM","download_json":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM.json","view_paper":"https://pith.science/paper/OPT7NIN3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.26990&json=true","fetch_graph":"https://pith.science/api/pith-number/OPT7NIN3RVP6QVPQYJIJFVPVLM/graph.json","fetch_events":"https://pith.science/api/pith-number/OPT7NIN3RVP6QVPQYJIJFVPVLM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM/action/storage_attestation","attest_author":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM/action/author_attestation","sign_citation":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM/action/citation_signature","submit_replication":"https://pith.science/pith/OPT7NIN3RVP6QVPQYJIJFVPVLM/action/replication_record"}},"created_at":"2026-06-26T01:16:06.097930+00:00","updated_at":"2026-06-26T01:16:06.097930+00:00"}