{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:J2YYL3X2BSG2YETVWNWX2QASPM","short_pith_number":"pith:J2YYL3X2","schema_version":"1.0","canonical_sha256":"4eb185eefa0c8dac1275b36d7d40127b1f058219e31da2e9679f5b0792dccaaf","source":{"kind":"arxiv","id":"2604.19047","version":2},"attestation_state":"computed","paper":{"title":"RARE: Redundancy-Aware Retrieval Evaluation Framework for High-Similarity Corpora","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Standard retrieval benchmarks fail to capture performance drops in redundant real-world corpora.","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CL","authors_text":"Hanjun Cho, Jay-Yoon Lee","submitted_at":"2026-04-21T03:54:09Z","abstract_excerpt":"Existing QA benchmarks typically assume distinct documents with minimal overlap, yet real-world retrieval-augmented generation (RAG) systems operate on corpora such as financial reports, legal codes, and patents, where information is highly redundant and documents exhibit strong inter-document similarity. This mismatch undermines evaluation validity: retrievers can be unfairly undervalued even when they retrieve documents that provide sufficient evidence, because redundancy across documents is not accounted for in evaluation. On the other hand, retrievers that perform well on standard benchmar"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2604.19047","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-21T03:54:09Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"1d6b3bf8bbd3be524d6d28e9b81815ee70d6460c098abce447437685cdc4320f","abstract_canon_sha256":"1d973c9ad0e87f00d61f23e9725a31f54da424ef76c8bff21d1427388862c475"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-01T01:17:51.018262Z","signature_b64":"ayniYN6EpyzQbJW3Jv9tvJSLYhhhr6QVko/79u/zsc6H9ITTiCsnyEnQm6hrJevBCBW+bAv8qG0q9BKUPBuKCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4eb185eefa0c8dac1275b36d7d40127b1f058219e31da2e9679f5b0792dccaaf","last_reissued_at":"2026-07-01T01:17:51.017809Z","signature_status":"signed_v1","first_computed_at":"2026-07-01T01:17:51.017809Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RARE: Redundancy-Aware Retrieval Evaluation Framework for High-Similarity Corpora","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Standard retrieval benchmarks fail to capture performance drops in redundant real-world corpora.","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CL","authors_text":"Hanjun Cho, Jay-Yoon Lee","submitted_at":"2026-04-21T03:54:09Z","abstract_excerpt":"Existing QA benchmarks typically assume distinct documents with minimal overlap, yet real-world retrieval-augmented generation (RAG) systems operate on corpora such as financial reports, legal codes, and patents, where information is highly redundant and documents exhibit strong inter-document similarity. This mismatch undermines evaluation validity: retrievers can be unfairly undervalued even when they retrieve documents that provide sufficient evidence, because redundancy across documents is not accounted for in evaluation. On the other hand, retrievers that perform well on standard benchmar"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"a strong retriever baseline drops from 66.4% PerfRecall@10 on 4-hop General-Wiki to 5.0-27.9% PerfRecall@10 at 4-hop depth, revealing robustness gaps that current benchmarks fail to capture.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That decomposing documents into atomic facts plus CRRF rank-fusion produces benchmark data that faithfully reflects real-world redundancy and quality without introducing new artifacts or biases.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"RARE builds redundancy-aware benchmarks via atomic fact decomposition and CRRF-enhanced LLM generation, showing retriever PerfRecall@10 dropping from 66.4% on general data to 5.0-27.9% on high-similarity finance/legal/patent corpora.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Standard retrieval benchmarks fail to capture performance drops in redundant real-world corpora.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"1fddaed2005d8e704feb3d4a509026b2dc491639b6a0d86a38c190729dab4c7e"},"source":{"id":"2604.19047","kind":"arxiv","version":2},"verdict":{"id":"ec884627-c776-402f-8887-a4e4eccbcf71","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T02:28:25.779594Z","strongest_claim":"a strong retriever baseline drops from 66.4% PerfRecall@10 on 4-hop General-Wiki to 5.0-27.9% PerfRecall@10 at 4-hop depth, revealing robustness gaps that current benchmarks fail to capture.","one_line_summary":"RARE builds redundancy-aware benchmarks via atomic fact decomposition and CRRF-enhanced LLM generation, showing retriever PerfRecall@10 dropping from 66.4% on general data to 5.0-27.9% on high-similarity finance/legal/patent corpora.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That decomposing documents into atomic facts plus CRRF rank-fusion produces benchmark data that faithfully reflects real-world redundancy and quality without introducing new artifacts or biases.","pith_extraction_headline":"Standard retrieval benchmarks fail to capture performance drops in redundant real-world corpora."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.19047/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-20T03:23:56.161034Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"959df375be06dd87aef4dc7e4695f383b49e407e175130d67ad4b910e55e763d"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.19047","created_at":"2026-07-01T01:17:51.017866+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.19047v2","created_at":"2026-07-01T01:17:51.017866+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.19047","created_at":"2026-07-01T01:17:51.017866+00:00"},{"alias_kind":"pith_short_12","alias_value":"J2YYL3X2BSG2","created_at":"2026-07-01T01:17:51.017866+00:00"},{"alias_kind":"pith_short_16","alias_value":"J2YYL3X2BSG2YETV","created_at":"2026-07-01T01:17:51.017866+00:00"},{"alias_kind":"pith_short_8","alias_value":"J2YYL3X2","created_at":"2026-07-01T01:17:51.017866+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2606.13204","citing_title":"CoDeR: Local Constraint-Compatible Retrieval Beyond Semantic Similarity","ref_index":9,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM","json":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM.json","graph_json":"https://pith.science/api/pith-number/J2YYL3X2BSG2YETVWNWX2QASPM/graph.json","events_json":"https://pith.science/api/pith-number/J2YYL3X2BSG2YETVWNWX2QASPM/events.json","paper":"https://pith.science/paper/J2YYL3X2"},"agent_actions":{"view_html":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM","download_json":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM.json","view_paper":"https://pith.science/paper/J2YYL3X2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.19047&json=true","fetch_graph":"https://pith.science/api/pith-number/J2YYL3X2BSG2YETVWNWX2QASPM/graph.json","fetch_events":"https://pith.science/api/pith-number/J2YYL3X2BSG2YETVWNWX2QASPM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM/action/storage_attestation","attest_author":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM/action/author_attestation","sign_citation":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM/action/citation_signature","submit_replication":"https://pith.science/pith/J2YYL3X2BSG2YETVWNWX2QASPM/action/replication_record"}},"created_at":"2026-07-01T01:17:51.017866+00:00","updated_at":"2026-07-01T01:17:51.017866+00:00"}