{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:YN3ZSWSRUBMUHYXQSOWCYUZXOG","short_pith_number":"pith:YN3ZSWSR","schema_version":"1.0","canonical_sha256":"c377995a51a05943e2f093ac2c533771ab3c30fe064da521cdb09ce7daadd3c8","source":{"kind":"arxiv","id":"1902.00098","version":1},"attestation_state":"computed","paper":{"title":"The Second Conversational Intelligence Challenge (ConvAI2)","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.HC"],"primary_cat":"cs.AI","authors_text":"Alan W Black, Alexander Miller, Alexander Rudnicky, Arthur Szlam, Douwe Kiela, Emily Dinan, Iulian Serban, Jack Urbanek, Jason Weston, Jason Williams, Joelle Pineau, Kurt Shuster, Mikhail Burtsev, Ryan Lowe, Shrimai Prabhumoye, Valentin Malykh, Varvara Logacheva","submitted_at":"2019-01-31T22:14:34Z","abstract_excerpt":"We describe the setting and results of the ConvAI2 NeurIPS competition that aims to further the state-of-the-art in open-domain chatbots. Some key takeaways from the competition are: (i) pretrained Transformer variants are currently the best performing models on this task, (ii) but to improve performance on multi-turn conversations with humans, future systems must go beyond single word metrics like perplexity to measure the performance across sequences of utterances (conversations) -- in terms of repetition, consistency and balance of dialogue acts (e.g. how many questions asked vs. answered)."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.00098","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-01-31T22:14:34Z","cross_cats_sorted":["cs.CL","cs.HC"],"title_canon_sha256":"22d7cf3d1b8eb422d3786dc255ac36ef10a303dc03b5d6067bdd1a38aa94d4eb","abstract_canon_sha256":"8443c9b0b2017ea467d72d3569488787155901909b33ca42812939fd96c1b9ad"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:58.277068Z","signature_b64":"DVK5mEyRsDbF6UbdYFujET1urP5ss4N/qXTc3cAWi98LOYsp4W6FRQZkoy8nbB2HwenqaT0wg9KFljC1HTsnDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c377995a51a05943e2f093ac2c533771ab3c30fe064da521cdb09ce7daadd3c8","last_reissued_at":"2026-05-17T23:54:58.276604Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:58.276604Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Second Conversational Intelligence Challenge (ConvAI2)","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.HC"],"primary_cat":"cs.AI","authors_text":"Alan W Black, Alexander Miller, Alexander Rudnicky, Arthur Szlam, Douwe Kiela, Emily Dinan, Iulian Serban, Jack Urbanek, Jason Weston, Jason Williams, Joelle Pineau, Kurt Shuster, Mikhail Burtsev, Ryan Lowe, Shrimai Prabhumoye, Valentin Malykh, Varvara Logacheva","submitted_at":"2019-01-31T22:14:34Z","abstract_excerpt":"We describe the setting and results of the ConvAI2 NeurIPS competition that aims to further the state-of-the-art in open-domain chatbots. Some key takeaways from the competition are: (i) pretrained Transformer variants are currently the best performing models on this task, (ii) but to improve performance on multi-turn conversations with humans, future systems must go beyond single word metrics like perplexity to measure the performance across sequences of utterances (conversations) -- in terms of repetition, consistency and balance of dialogue acts (e.g. how many questions asked vs. answered)."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.00098","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.00098","created_at":"2026-05-17T23:54:58.276679+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.00098v1","created_at":"2026-05-17T23:54:58.276679+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.00098","created_at":"2026-05-17T23:54:58.276679+00:00"},{"alias_kind":"pith_short_12","alias_value":"YN3ZSWSRUBMU","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"YN3ZSWSRUBMUHYXQ","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"YN3ZSWSR","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2406.10162","citing_title":"Sycophancy to Subterfuge: Investigating Reward-Tampering in Large Language Models","ref_index":135,"is_internal_anchor":true},{"citing_arxiv_id":"1910.13461","citing_title":"BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension","ref_index":3,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG","json":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG.json","graph_json":"https://pith.science/api/pith-number/YN3ZSWSRUBMUHYXQSOWCYUZXOG/graph.json","events_json":"https://pith.science/api/pith-number/YN3ZSWSRUBMUHYXQSOWCYUZXOG/events.json","paper":"https://pith.science/paper/YN3ZSWSR"},"agent_actions":{"view_html":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG","download_json":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG.json","view_paper":"https://pith.science/paper/YN3ZSWSR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.00098&json=true","fetch_graph":"https://pith.science/api/pith-number/YN3ZSWSRUBMUHYXQSOWCYUZXOG/graph.json","fetch_events":"https://pith.science/api/pith-number/YN3ZSWSRUBMUHYXQSOWCYUZXOG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG/action/storage_attestation","attest_author":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG/action/author_attestation","sign_citation":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG/action/citation_signature","submit_replication":"https://pith.science/pith/YN3ZSWSRUBMUHYXQSOWCYUZXOG/action/replication_record"}},"created_at":"2026-05-17T23:54:58.276679+00:00","updated_at":"2026-05-17T23:54:58.276679+00:00"}