{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:FCKZ6JJKVWJM35PNRHLWIGEFAZ","short_pith_number":"pith:FCKZ6JJK","canonical_record":{"source":{"id":"2604.22891","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-24T09:46:22Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"4d4d23e09121506db1381c0fccf13bf15848e72874277c48229f8414b8ea0d8d","abstract_canon_sha256":"0e61c9836c571c203405ee11af53a5bc9430944ca9704f8f9f98e09b091f3d38"},"schema_version":"1.0"},"canonical_sha256":"28959f252aad92cdf5ed89d76418850669e7c6d48051135a8709401c7fa1e9f0","source":{"kind":"arxiv","id":"2604.22891","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.22891","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"arxiv_version","alias_value":"2604.22891v4","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.22891","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"pith_short_12","alias_value":"FCKZ6JJKVWJM","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"pith_short_16","alias_value":"FCKZ6JJKVWJM35PN","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"pith_short_8","alias_value":"FCKZ6JJK","created_at":"2026-06-03T01:05:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:FCKZ6JJKVWJM35PNRHLWIGEFAZ","target":"record","payload":{"canonical_record":{"source":{"id":"2604.22891","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-24T09:46:22Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"4d4d23e09121506db1381c0fccf13bf15848e72874277c48229f8414b8ea0d8d","abstract_canon_sha256":"0e61c9836c571c203405ee11af53a5bc9430944ca9704f8f9f98e09b091f3d38"},"schema_version":"1.0"},"canonical_sha256":"28959f252aad92cdf5ed89d76418850669e7c6d48051135a8709401c7fa1e9f0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:50.629138Z","signature_b64":"anFw7dDRWJqjmFOjat6iCt5U2YA37jzeBsWLVucfqIOMagJetkgenveubXfk77Q6gq0z/1ImlsgW5bL4ro7SBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"28959f252aad92cdf5ed89d76418850669e7c6d48051135a8709401c7fa1e9f0","last_reissued_at":"2026-06-03T01:05:50.628659Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:50.628659Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.22891","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T01:05:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"24KyTYiivIKVyG1wKTD5oZT3Q6uaPQMx8mZSwTaoGHkLpi7YVOhTdjdrSSMcmeIzf2Uf77X9G7sy/D6SVeY1Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T12:43:44.787334Z"},"content_sha256":"929ba211afecf093e00df8473c31b6db863a55a486de1c4e7f85106866c4c02a","schema_version":"1.0","event_id":"sha256:929ba211afecf093e00df8473c31b6db863a55a486de1c4e7f85106866c4c02a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:FCKZ6JJKVWJM35PNRHLWIGEFAZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Quantifying and Mitigating Self-Preference Bias of LLM Judges","license":"http://creativecommons.org/licenses/by/4.0/","headline":"LLM judges show self-preference bias uncorrelated with capability, but a multi-dimensional strategy reduces it by 31.5 percent.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Chuxian Qiu, Jinming Yang, Tao Zhou, Xinshan Jiao, Zheng Hu, Zhenyu Deng","submitted_at":"2026-04-24T09:46:22Z","abstract_excerpt":"LLM-as-a-Judge has become a dominant approach in automated evaluation systems, playing critical roles in model alignment, leaderboard construction, quality control, and so on. However, the scalability and trustworthiness of this approach can be substantially distorted by Self-Preference Bias (SPB), which is a directional evaluative deviation in which LLMs systematically favor or disfavor their own generated outputs during evaluation. Existing measurements rely on costly human annotations and conflate generative capability with evaluative stance, and thus are impractical for large-scale deploym"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Empirical analysis across 20 mainstream LLMs reveals that advanced capabilities are often uncorrelated, or even negatively correlated, with low SPB. To mitigate this bias, we propose a structured multi-dimensional evaluation strategy grounded in cognitive load decomposition, which reduces SPB by 31.5% on average.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The constructed pairs of responses truly have negligible quality differences, allowing statistical separation of bias propensity from genuine discriminability without human gold standards.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"An automated framework using equal-quality response pairs quantifies self-preference bias in LLM judges and reduces it by 31.5% via a cognitive-load-based multi-dimensional evaluation strategy.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"LLM judges show self-preference bias uncorrelated with capability, but a multi-dimensional strategy reduces it by 31.5 percent.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a167c70a3077f36031caee4d4acd9393f52c8bba5ce44ba1e2613521e2b0f0ff"},"source":{"id":"2604.22891","kind":"arxiv","version":4},"verdict":{"id":"88a33dcb-711c-4a16-a31a-66366c4fd3e3","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T06:54:19.298185Z","strongest_claim":"Empirical analysis across 20 mainstream LLMs reveals that advanced capabilities are often uncorrelated, or even negatively correlated, with low SPB. To mitigate this bias, we propose a structured multi-dimensional evaluation strategy grounded in cognitive load decomposition, which reduces SPB by 31.5% on average.","one_line_summary":"An automated framework using equal-quality response pairs quantifies self-preference bias in LLM judges and reduces it by 31.5% via a cognitive-load-based multi-dimensional evaluation strategy.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The constructed pairs of responses truly have negligible quality differences, allowing statistical separation of bias propensity from genuine discriminability without human gold standards.","pith_extraction_headline":"LLM judges show self-preference bias uncorrelated with capability, but a multi-dimensional strategy reduces it by 31.5 percent."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.22891/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-21T10:40:42.222700Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-20T00:03:43.269957Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"589ca057d2b4234ead7747e599cc906079c1b256f655a034c2f00be06773a11f"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"deb48011fd6bc73ae0395fafe90c49fc9a00f844ad3947a049742519e4ce9b99"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"88a33dcb-711c-4a16-a31a-66366c4fd3e3"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T01:05:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"f9G+pxcNOSfkOY7FXYb1NTS2YfEv1VAk+5YXRY9r4j9O5s2X1bVRHInqRPKms2UZINcOVvWe27J3b1lnpkKhAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T12:43:44.787822Z"},"content_sha256":"473c11ea83d2f2a2bfddfc9629931d8c8a91114e6d2aa6304cfd5c28de1a5351","schema_version":"1.0","event_id":"sha256:473c11ea83d2f2a2bfddfc9629931d8c8a91114e6d2aa6304cfd5c28de1a5351"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FCKZ6JJKVWJM35PNRHLWIGEFAZ/bundle.json","state_url":"https://pith.science/pith/FCKZ6JJKVWJM35PNRHLWIGEFAZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FCKZ6JJKVWJM35PNRHLWIGEFAZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T12:43:44Z","links":{"resolver":"https://pith.science/pith/FCKZ6JJKVWJM35PNRHLWIGEFAZ","bundle":"https://pith.science/pith/FCKZ6JJKVWJM35PNRHLWIGEFAZ/bundle.json","state":"https://pith.science/pith/FCKZ6JJKVWJM35PNRHLWIGEFAZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FCKZ6JJKVWJM35PNRHLWIGEFAZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:FCKZ6JJKVWJM35PNRHLWIGEFAZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0e61c9836c571c203405ee11af53a5bc9430944ca9704f8f9f98e09b091f3d38","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-24T09:46:22Z","title_canon_sha256":"4d4d23e09121506db1381c0fccf13bf15848e72874277c48229f8414b8ea0d8d"},"schema_version":"1.0","source":{"id":"2604.22891","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.22891","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"arxiv_version","alias_value":"2604.22891v4","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.22891","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"pith_short_12","alias_value":"FCKZ6JJKVWJM","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"pith_short_16","alias_value":"FCKZ6JJKVWJM35PN","created_at":"2026-06-03T01:05:50Z"},{"alias_kind":"pith_short_8","alias_value":"FCKZ6JJK","created_at":"2026-06-03T01:05:50Z"}],"graph_snapshots":[{"event_id":"sha256:473c11ea83d2f2a2bfddfc9629931d8c8a91114e6d2aa6304cfd5c28de1a5351","target":"graph","created_at":"2026-06-03T01:05:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Empirical analysis across 20 mainstream LLMs reveals that advanced capabilities are often uncorrelated, or even negatively correlated, with low SPB. To mitigate this bias, we propose a structured multi-dimensional evaluation strategy grounded in cognitive load decomposition, which reduces SPB by 31.5% on average."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The constructed pairs of responses truly have negligible quality differences, allowing statistical separation of bias propensity from genuine discriminability without human gold standards."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"An automated framework using equal-quality response pairs quantifies self-preference bias in LLM judges and reduces it by 31.5% via a cognitive-load-based multi-dimensional evaluation strategy."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"LLM judges show self-preference bias uncorrelated with capability, but a multi-dimensional strategy reduces it by 31.5 percent."}],"snapshot_sha256":"a167c70a3077f36031caee4d4acd9393f52c8bba5ce44ba1e2613521e2b0f0ff"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"deb48011fd6bc73ae0395fafe90c49fc9a00f844ad3947a049742519e4ce9b99"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-21T10:40:42.222700Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-20T00:03:43.269957Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2604.22891/integrity.json","findings":[],"snapshot_sha256":"589ca057d2b4234ead7747e599cc906079c1b256f655a034c2f00be06773a11f","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"LLM-as-a-Judge has become a dominant approach in automated evaluation systems, playing critical roles in model alignment, leaderboard construction, quality control, and so on. However, the scalability and trustworthiness of this approach can be substantially distorted by Self-Preference Bias (SPB), which is a directional evaluative deviation in which LLMs systematically favor or disfavor their own generated outputs during evaluation. Existing measurements rely on costly human annotations and conflate generative capability with evaluative stance, and thus are impractical for large-scale deploym","authors_text":"Chuxian Qiu, Jinming Yang, Tao Zhou, Xinshan Jiao, Zheng Hu, Zhenyu Deng","cross_cats":["cs.AI","cs.CL"],"headline":"LLM judges show self-preference bias uncorrelated with capability, but a multi-dimensional strategy reduces it by 31.5 percent.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-24T09:46:22Z","title":"Quantifying and Mitigating Self-Preference Bias of LLM Judges"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.22891","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-15T06:54:19.298185Z","id":"88a33dcb-711c-4a16-a31a-66366c4fd3e3","model_set":{"reader":"grok-4.3"},"one_line_summary":"An automated framework using equal-quality response pairs quantifies self-preference bias in LLM judges and reduces it by 31.5% via a cognitive-load-based multi-dimensional evaluation strategy.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"LLM judges show self-preference bias uncorrelated with capability, but a multi-dimensional strategy reduces it by 31.5 percent.","strongest_claim":"Empirical analysis across 20 mainstream LLMs reveals that advanced capabilities are often uncorrelated, or even negatively correlated, with low SPB. To mitigate this bias, we propose a structured multi-dimensional evaluation strategy grounded in cognitive load decomposition, which reduces SPB by 31.5% on average.","weakest_assumption":"The constructed pairs of responses truly have negligible quality differences, allowing statistical separation of bias propensity from genuine discriminability without human gold standards."}},"verdict_id":"88a33dcb-711c-4a16-a31a-66366c4fd3e3"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:929ba211afecf093e00df8473c31b6db863a55a486de1c4e7f85106866c4c02a","target":"record","created_at":"2026-06-03T01:05:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0e61c9836c571c203405ee11af53a5bc9430944ca9704f8f9f98e09b091f3d38","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-04-24T09:46:22Z","title_canon_sha256":"4d4d23e09121506db1381c0fccf13bf15848e72874277c48229f8414b8ea0d8d"},"schema_version":"1.0","source":{"id":"2604.22891","kind":"arxiv","version":4}},"canonical_sha256":"28959f252aad92cdf5ed89d76418850669e7c6d48051135a8709401c7fa1e9f0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"28959f252aad92cdf5ed89d76418850669e7c6d48051135a8709401c7fa1e9f0","first_computed_at":"2026-06-03T01:05:50.628659Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T01:05:50.628659Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"anFw7dDRWJqjmFOjat6iCt5U2YA37jzeBsWLVucfqIOMagJetkgenveubXfk77Q6gq0z/1ImlsgW5bL4ro7SBw==","signature_status":"signed_v1","signed_at":"2026-06-03T01:05:50.629138Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.22891","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:929ba211afecf093e00df8473c31b6db863a55a486de1c4e7f85106866c4c02a","sha256:473c11ea83d2f2a2bfddfc9629931d8c8a91114e6d2aa6304cfd5c28de1a5351"],"state_sha256":"36e5514cbee7b557ad5bccfdbadf9ceda370f34a945364fc6e6291b88f9e8965"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DKQAYGtaKD+XqsKhMAOsmIamPEwjdowltnCzqOibAj0MuPt9v21zBPKrgNUeOiOUbNp/gV2GD/FLvgRo4s7pAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T12:43:44.790531Z","bundle_sha256":"cdb5784838046d5e4cf3a4bd1e77e4133014e12e152843695d25065c33a0cbe8"}}