{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:S5UME6L36M5U6EHALQZWXJWWIJ","short_pith_number":"pith:S5UME6L3","canonical_record":{"source":{"id":"2606.24596","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","cross_cats_sorted":[],"title_canon_sha256":"762e14629aec15a742ef790ecb341ca8274a17f90d3257b780c71ba6d557b56a","abstract_canon_sha256":"3347d0cedd1b2c4822a2bed7164649b2f38c4c0090a9d30b3865f5e27ce6318a"},"schema_version":"1.0"},"canonical_sha256":"9768c2797bf33b4f10e05c336ba6d642729feebca063dbccbb433de6a49dcb28","source":{"kind":"arxiv","id":"2606.24596","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24596","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24596v1","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24596","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_12","alias_value":"S5UME6L36M5U","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_16","alias_value":"S5UME6L36M5U6EHA","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_8","alias_value":"S5UME6L3","created_at":"2026-06-24T01:15:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:S5UME6L36M5U6EHALQZWXJWWIJ","target":"record","payload":{"canonical_record":{"source":{"id":"2606.24596","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","cross_cats_sorted":[],"title_canon_sha256":"762e14629aec15a742ef790ecb341ca8274a17f90d3257b780c71ba6d557b56a","abstract_canon_sha256":"3347d0cedd1b2c4822a2bed7164649b2f38c4c0090a9d30b3865f5e27ce6318a"},"schema_version":"1.0"},"canonical_sha256":"9768c2797bf33b4f10e05c336ba6d642729feebca063dbccbb433de6a49dcb28","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:36.498121Z","signature_b64":"ynnc4hqIXtiIoeg1IeH5oBeWHNzYEqbU8RjCdivJeDZ9rkV7yZjBBsSLix9CHilgpCb9kEzSy6vCwUXEuhgwCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9768c2797bf33b4f10e05c336ba6d642729feebca063dbccbb433de6a49dcb28","last_reissued_at":"2026-06-24T01:15:36.497720Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:36.497720Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.24596","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2lMCik3+NiKTX26cGKgtstys8CWTZpHVZ8qKJdwJ3eZaD91+FWEpjZdh5anU0HD9i0yCXNb2OycANjiUo0fNDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:26:35.606677Z"},"content_sha256":"f075d35e02f2d1245c5b0e2870843adb50d82b81d41f37c474132287910cb701","schema_version":"1.0","event_id":"sha256:f075d35e02f2d1245c5b0e2870843adb50d82b81d41f37c474132287910cb701"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:S5UME6L36M5U6EHALQZWXJWWIJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"To Compare, or Not to Compare: On Methodological Practices in Evaluating Social Bias","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Federico Marcuzzi, Iryna Gurevych, Roy Schwartz, Xuefei Ning","submitted_at":"2026-06-23T13:53:50Z","abstract_excerpt":"As Large Language Models are increasingly deployed in critical applications, robustly evaluating their social biases is paramount. However, the current literature suffers from widespread methodological fragmentation, which yields contradictory conclusions. This stems largely from ignoring the structural framing of benchmark-level evaluations. To resolve this, we introduce a unified and controllable framework that standardizes heterogeneous benchmarks to systematically contrast isolated demographic assessments with forced-choice comparative settings. Crucially, this allows us to disentangle the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24596","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24596/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RzNIoeWbX6Q4+2bQ3RVQTMGMhygt9YGQ0LFe7yBntvTKebHX6K6amyHUO0d1LuDgq9K7YMWMW/6+dMQRlmSQDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:26:35.607044Z"},"content_sha256":"98289d3b6a2a35721a8786fb2589b9ca404a5f1a3e4f9292053d8df41a29cdb9","schema_version":"1.0","event_id":"sha256:98289d3b6a2a35721a8786fb2589b9ca404a5f1a3e4f9292053d8df41a29cdb9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/S5UME6L36M5U6EHALQZWXJWWIJ/bundle.json","state_url":"https://pith.science/pith/S5UME6L36M5U6EHALQZWXJWWIJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/S5UME6L36M5U6EHALQZWXJWWIJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T21:26:35Z","links":{"resolver":"https://pith.science/pith/S5UME6L36M5U6EHALQZWXJWWIJ","bundle":"https://pith.science/pith/S5UME6L36M5U6EHALQZWXJWWIJ/bundle.json","state":"https://pith.science/pith/S5UME6L36M5U6EHALQZWXJWWIJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/S5UME6L36M5U6EHALQZWXJWWIJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:S5UME6L36M5U6EHALQZWXJWWIJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3347d0cedd1b2c4822a2bed7164649b2f38c4c0090a9d30b3865f5e27ce6318a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","title_canon_sha256":"762e14629aec15a742ef790ecb341ca8274a17f90d3257b780c71ba6d557b56a"},"schema_version":"1.0","source":{"id":"2606.24596","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24596","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24596v1","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24596","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_12","alias_value":"S5UME6L36M5U","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_16","alias_value":"S5UME6L36M5U6EHA","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_8","alias_value":"S5UME6L3","created_at":"2026-06-24T01:15:36Z"}],"graph_snapshots":[{"event_id":"sha256:98289d3b6a2a35721a8786fb2589b9ca404a5f1a3e4f9292053d8df41a29cdb9","target":"graph","created_at":"2026-06-24T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.24596/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As Large Language Models are increasingly deployed in critical applications, robustly evaluating their social biases is paramount. However, the current literature suffers from widespread methodological fragmentation, which yields contradictory conclusions. This stems largely from ignoring the structural framing of benchmark-level evaluations. To resolve this, we introduce a unified and controllable framework that standardizes heterogeneous benchmarks to systematically contrast isolated demographic assessments with forced-choice comparative settings. Crucially, this allows us to disentangle the","authors_text":"Federico Marcuzzi, Iryna Gurevych, Roy Schwartz, Xuefei Ning","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","title":"To Compare, or Not to Compare: On Methodological Practices in Evaluating Social Bias"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24596","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f075d35e02f2d1245c5b0e2870843adb50d82b81d41f37c474132287910cb701","target":"record","created_at":"2026-06-24T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3347d0cedd1b2c4822a2bed7164649b2f38c4c0090a9d30b3865f5e27ce6318a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","title_canon_sha256":"762e14629aec15a742ef790ecb341ca8274a17f90d3257b780c71ba6d557b56a"},"schema_version":"1.0","source":{"id":"2606.24596","kind":"arxiv","version":1}},"canonical_sha256":"9768c2797bf33b4f10e05c336ba6d642729feebca063dbccbb433de6a49dcb28","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9768c2797bf33b4f10e05c336ba6d642729feebca063dbccbb433de6a49dcb28","first_computed_at":"2026-06-24T01:15:36.497720Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T01:15:36.497720Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ynnc4hqIXtiIoeg1IeH5oBeWHNzYEqbU8RjCdivJeDZ9rkV7yZjBBsSLix9CHilgpCb9kEzSy6vCwUXEuhgwCQ==","signature_status":"signed_v1","signed_at":"2026-06-24T01:15:36.498121Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.24596","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f075d35e02f2d1245c5b0e2870843adb50d82b81d41f37c474132287910cb701","sha256:98289d3b6a2a35721a8786fb2589b9ca404a5f1a3e4f9292053d8df41a29cdb9"],"state_sha256":"0bb584654d3e4d6534f9fe495a49d2582583e43558873771a9d5802c470bfd89"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nbO9mdu1rfXJGh/Bo4ofrCLWSa0XA4ZvvdpEL9tWFCIau8OlwE5k1QOd0yh4UZXKF71Ns1cIbvZSvMRc0lEDBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T21:26:35.609047Z","bundle_sha256":"a2089bbd70bca3494cea6fe17ee7b35c33a11c1895c86f70bd936215aa3f88b6"}}