{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2020:5X6JLHOGN2HVWSPONIMAI4JMZX","short_pith_number":"pith:5X6JLHOG","schema_version":"1.0","canonical_sha256":"edfc959dc66e8f5b49ee6a1804712ccdc5c9f93fb11fcb97e071dfa1db53a4a3","source":{"kind":"arxiv","id":"2007.01282","version":2},"attestation_state":"computed","paper":{"title":"Leveraging Passage Retrieval with Generative Models for Open Domain Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Generative models for open-domain question answering gain from retrieving multiple passages and combining their evidence.","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Edouard Grave, Gautier Izacard","submitted_at":"2020-07-02T17:44:57Z","abstract_excerpt":"Generative models for open domain question answering have proven to be competitive, without resorting to external knowledge. While promising, this approach requires to use models with billions of parameters, which are expensive to train and query. In this paper, we investigate how much these models can benefit from retrieving text passages, potentially containing evidence. We obtain state-of-the-art results on the Natural Questions and TriviaQA open benchmarks. Interestingly, we observe that the performance of this method significantly improves when increasing the number of retrieved passages."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":true},"canonical_record":{"source":{"id":"2007.01282","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2020-07-02T17:44:57Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"701ee88276237a38762e765c99c525be5ecd6ae1b0d408730dcaaba6404913b2","abstract_canon_sha256":"77e01979aac7fd9eade2a6e28fb35908563b4982a042002d099aafa2280e1925"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:14.049521Z","signature_b64":"AfhAX0MFpYtJ2B4u1++Ebo7eK2lDoZ38ued5LPDjjx/4dl1GuDT9SrqPQjNLGwlfEMUId8l4EmtBPfvmOkGCAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"edfc959dc66e8f5b49ee6a1804712ccdc5c9f93fb11fcb97e071dfa1db53a4a3","last_reissued_at":"2026-05-17T23:38:14.048603Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:14.048603Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Leveraging Passage Retrieval with Generative Models for Open Domain Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Generative models for open-domain question answering gain from retrieving multiple passages and combining their evidence.","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Edouard Grave, Gautier Izacard","submitted_at":"2020-07-02T17:44:57Z","abstract_excerpt":"Generative models for open domain question answering have proven to be competitive, without resorting to external knowledge. While promising, this approach requires to use models with billions of parameters, which are expensive to train and query. In this paper, we investigate how much these models can benefit from retrieving text passages, potentially containing evidence. We obtain state-of-the-art results on the Natural Questions and TriviaQA open benchmarks. Interestingly, we observe that the performance of this method significantly improves when increasing the number of retrieved passages."},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We obtain state-of-the-art results on the Natural Questions and TriviaQA open benchmarks. Interestingly, we observe that the performance of this method significantly improves when increasing the number of retrieved passages. This is evidence that generative models are good at aggregating and combining evidence from multiple passages.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the observed gains are attributable to the generative model's ability to aggregate evidence across passages rather than to confounding factors such as retrieval quality, prompt formatting, or benchmark-specific artifacts; the abstract provides no controls or ablation details to isolate this mechanism.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Augmenting generative models with passage retrieval yields state-of-the-art results on Natural Questions and TriviaQA, with performance scaling positively as more passages are retrieved.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Generative models for open-domain question answering gain from retrieving multiple passages and combining their evidence.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"6a52f2d0347224fae0afe8a6555442b9015082c8aa97caeccc2e00f2b65eb245"},"source":{"id":"2007.01282","kind":"arxiv","version":2},"verdict":{"id":"02da9189-dbba-4d44-b8a9-f08273771386","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T12:43:04.017029Z","strongest_claim":"We obtain state-of-the-art results on the Natural Questions and TriviaQA open benchmarks. Interestingly, we observe that the performance of this method significantly improves when increasing the number of retrieved passages. This is evidence that generative models are good at aggregating and combining evidence from multiple passages.","one_line_summary":"Augmenting generative models with passage retrieval yields state-of-the-art results on Natural Questions and TriviaQA, with performance scaling positively as more passages are retrieved.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the observed gains are attributable to the generative model's ability to aggregate evidence across passages rather than to confounding factors such as retrieval quality, prompt formatting, or benchmark-specific artifacts; the abstract provides no controls or ablation details to isolate this mechanism.","pith_extraction_headline":"Generative models for open-domain question answering gain from retrieving multiple passages and combining their evidence."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"8ff57abf21d1c56aa30403874a4ea1b258996024ff81d38e4a624819f885c873"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2007.01282","created_at":"2026-05-17T23:38:14.048752+00:00"},{"alias_kind":"arxiv_version","alias_value":"2007.01282v2","created_at":"2026-05-17T23:38:14.048752+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2007.01282","created_at":"2026-05-17T23:38:14.048752+00:00"},{"alias_kind":"pith_short_12","alias_value":"5X6JLHOGN2HV","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"5X6JLHOGN2HVWSPO","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"5X6JLHOG","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":20,"internal_anchor_count":20,"sample":[{"citing_arxiv_id":"2510.01409","citing_title":"OntoLogX: Ontology-Guided Knowledge Graph Extraction from Cybersecurity Logs with Large Language Models","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2511.09803","citing_title":"Retrieval as a Decision: Training-Free Adaptive Gating for Efficient RAG","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2301.12652","citing_title":"REPLUG: Retrieval-Augmented Black-Box Language Models","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2512.20136","citing_title":"M$^3$KG-RAG: Multi-hop Multimodal Knowledge Graph-enhanced Retrieval-Augmented Generation","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2208.03299","citing_title":"Atlas: Few-shot Learning with Retrieval Augmented Language Models","ref_index":201,"is_internal_anchor":true},{"citing_arxiv_id":"2309.16671","citing_title":"Demystifying CLIP Data","ref_index":101,"is_internal_anchor":true},{"citing_arxiv_id":"2604.20844","citing_title":"AtomicRAG: Atom-Entity Graphs for Retrieval-Augmented Generation","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2004.04906","citing_title":"Dense Passage Retrieval for Open-Domain Question Answering","ref_index":82,"is_internal_anchor":true},{"citing_arxiv_id":"2401.18059","citing_title":"RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval","ref_index":99,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14503","citing_title":"Not All RAGs Are Created Equal: A Component-Wise Empirical Study for Software Engineering Tasks","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2112.09118","citing_title":"Unsupervised Dense Information Retrieval with Contrastive Learning","ref_index":140,"is_internal_anchor":true},{"citing_arxiv_id":"2201.08239","citing_title":"LaMDA: Language Models for Dialog Applications","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.03989","citing_title":"An Agent-Oriented Pluggable Experience-RAG Skill for Experience-Driven Retrieval Strategy Orchestration","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05459","citing_title":"Privacy Without Losing Place: A Paradigm for Private Retrieval in Spatial RAGs","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2604.12610","citing_title":"Transforming External Knowledge into Triplets for Enhanced Retrieval in RAG of LLMs","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07274","citing_title":"A Systematic Study of Retrieval Pipeline Design for Retrieval-Augmented Medical Question Answering","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2310.08560","citing_title":"MemGPT: Towards LLMs as Operating Systems","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2604.15270","citing_title":"Enhancing Large Language Models with Retrieval Augmented Generation for Software Testing and Inspection Automation","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16686","citing_title":"No-Worse Context-Aware Decoding: Preventing Neutral Regression in Context-Conditioned Generation","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.03989","citing_title":"An Agent-Oriented Pluggable Experience-RAG Skill for Experience-Driven Retrieval Strategy Orchestration","ref_index":3,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":1,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX","json":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX.json","graph_json":"https://pith.science/api/pith-number/5X6JLHOGN2HVWSPONIMAI4JMZX/graph.json","events_json":"https://pith.science/api/pith-number/5X6JLHOGN2HVWSPONIMAI4JMZX/events.json","paper":"https://pith.science/paper/5X6JLHOG"},"agent_actions":{"view_html":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX","download_json":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX.json","view_paper":"https://pith.science/paper/5X6JLHOG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2007.01282&json=true","fetch_graph":"https://pith.science/api/pith-number/5X6JLHOGN2HVWSPONIMAI4JMZX/graph.json","fetch_events":"https://pith.science/api/pith-number/5X6JLHOGN2HVWSPONIMAI4JMZX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX/action/storage_attestation","attest_author":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX/action/author_attestation","sign_citation":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX/action/citation_signature","submit_replication":"https://pith.science/pith/5X6JLHOGN2HVWSPONIMAI4JMZX/action/replication_record"}},"created_at":"2026-05-17T23:38:14.048752+00:00","updated_at":"2026-05-17T23:38:14.048752+00:00"}