{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:Q7V5FN7RXCIV24AHCJCX3ONEAQ","short_pith_number":"pith:Q7V5FN7R","schema_version":"1.0","canonical_sha256":"87ebd2b7f1b8915d700712457db9a4043e4e6659d99d581b65f33fcd193ffa01","source":{"kind":"arxiv","id":"1707.03904","version":2},"attestation_state":"computed","paper":{"title":"Quasar: Datasets for Question Answering by Search and Reading","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.CL","authors_text":"Bhuwan Dhingra, Kathryn Mazaitis, William W. Cohen","submitted_at":"2017-07-12T20:53:26Z","abstract_excerpt":"We present two new large-scale datasets aimed at evaluating systems designed to comprehend a natural language query and extract its answer from a large corpus of text. The Quasar-S dataset consists of 37000 cloze-style (fill-in-the-gap) queries constructed from definitions of software entity tags on the popular website Stack Overflow. The posts and comments on the website serve as the background corpus for answering the cloze questions. The Quasar-T dataset consists of 43000 open-domain trivia questions and their answers obtained from various internet sources. ClueWeb09 serves as the backgroun"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.03904","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-12T20:53:26Z","cross_cats_sorted":["cs.IR","cs.LG"],"title_canon_sha256":"f23315ddcbab960c7b25b35113298948c11f4d19075bdfb23684c47beaeab80a","abstract_canon_sha256":"60eebe7c774ca761f57799818092accfbc90ef8bc17275c276a5de410ba14c46"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:38:20.433624Z","signature_b64":"yVEsQ4SAn+BlhzFf9I9ovapSP4ySdwA0Ux5CxsSgrELwVB/e4v8aUnQmd0DK4ZiwxgdNgDIzcf79fmBYJ621Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"87ebd2b7f1b8915d700712457db9a4043e4e6659d99d581b65f33fcd193ffa01","last_reissued_at":"2026-05-18T00:38:20.433037Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:38:20.433037Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Quasar: Datasets for Question Answering by Search and Reading","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG"],"primary_cat":"cs.CL","authors_text":"Bhuwan Dhingra, Kathryn Mazaitis, William W. Cohen","submitted_at":"2017-07-12T20:53:26Z","abstract_excerpt":"We present two new large-scale datasets aimed at evaluating systems designed to comprehend a natural language query and extract its answer from a large corpus of text. The Quasar-S dataset consists of 37000 cloze-style (fill-in-the-gap) queries constructed from definitions of software entity tags on the popular website Stack Overflow. The posts and comments on the website serve as the background corpus for answering the cloze questions. The Quasar-T dataset consists of 43000 open-domain trivia questions and their answers obtained from various internet sources. ClueWeb09 serves as the backgroun"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.03904","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.03904","created_at":"2026-05-18T00:38:20.433127+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.03904v2","created_at":"2026-05-18T00:38:20.433127+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.03904","created_at":"2026-05-18T00:38:20.433127+00:00"},{"alias_kind":"pith_short_12","alias_value":"Q7V5FN7RXCIV","created_at":"2026-05-18T12:31:37.085036+00:00"},{"alias_kind":"pith_short_16","alias_value":"Q7V5FN7RXCIV24AH","created_at":"2026-05-18T12:31:37.085036+00:00"},{"alias_kind":"pith_short_8","alias_value":"Q7V5FN7R","created_at":"2026-05-18T12:31:37.085036+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"2110.01552","citing_title":"Perhaps PTLMs Should Go to School -- A Task to Assess Open Book and Closed Book QA","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2505.18853","citing_title":"Smoothie: Smoothing Diffusion on Token Embeddings for Text Generation","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20199","citing_title":"FlowLM: Few-Step Language Modeling via Diffusion-to-Flow Adaptation","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2210.08933","citing_title":"DiffuSeq: Sequence to Sequence Text Generation with Diffusion Models","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14531","citing_title":"Language Generation as Optimal Control: Closed-Loop Diffusion in Latent Control Space","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2305.15717","citing_title":"The False Promise of Imitating Proprietary LLMs","ref_index":210,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14531","citing_title":"Language Generation as Optimal Control: Closed-Loop Diffusion in Latent Control Space","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"1901.04085","citing_title":"Passage Re-ranking with BERT","ref_index":4,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ","json":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ.json","graph_json":"https://pith.science/api/pith-number/Q7V5FN7RXCIV24AHCJCX3ONEAQ/graph.json","events_json":"https://pith.science/api/pith-number/Q7V5FN7RXCIV24AHCJCX3ONEAQ/events.json","paper":"https://pith.science/paper/Q7V5FN7R"},"agent_actions":{"view_html":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ","download_json":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ.json","view_paper":"https://pith.science/paper/Q7V5FN7R","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.03904&json=true","fetch_graph":"https://pith.science/api/pith-number/Q7V5FN7RXCIV24AHCJCX3ONEAQ/graph.json","fetch_events":"https://pith.science/api/pith-number/Q7V5FN7RXCIV24AHCJCX3ONEAQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ/action/storage_attestation","attest_author":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ/action/author_attestation","sign_citation":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ/action/citation_signature","submit_replication":"https://pith.science/pith/Q7V5FN7RXCIV24AHCJCX3ONEAQ/action/replication_record"}},"created_at":"2026-05-18T00:38:20.433127+00:00","updated_at":"2026-05-18T00:38:20.433127+00:00"}