{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:VK5WG4WG7K2TLAKHVEP4CJ4O6O","short_pith_number":"pith:VK5WG4WG","schema_version":"1.0","canonical_sha256":"aabb6372c6fab5358147a91fc1278ef3a745c7c999079ee64763a757f10371f2","source":{"kind":"arxiv","id":"1512.05742","version":3},"attestation_state":"computed","paper":{"title":"A Survey of Available Corpora for Building Data-Driven Dialogue Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.HC","cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Iulian Vlad Serban, Joelle Pineau, Laurent Charlin, Peter Henderson, Ryan Lowe","submitted_at":"2015-12-17T19:52:39Z","abstract_excerpt":"During the past decade, several areas of speech and language understanding have witnessed substantial breakthroughs from the use of data-driven models. In the area of dialogue systems, the trend is less obvious, and most practical systems are still built through significant engineering and expert knowledge. Nevertheless, several recent results suggest that data-driven approaches are feasible and quite promising. To facilitate research in this area, we have carried out a wide survey of publicly available datasets suitable for data-driven learning of dialogue systems. We discuss important charac"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1512.05742","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-12-17T19:52:39Z","cross_cats_sorted":["cs.AI","cs.HC","cs.LG","stat.ML"],"title_canon_sha256":"51085664e59deb8875a3cc458bba6f797787b69d4fedde63fe700d7aac8cfca5","abstract_canon_sha256":"88ab0c824551e788a7138459c47162f51fc244fd0cabedd6f8c0810f5600ab09"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:48:20.398925Z","signature_b64":"Wp6Ogy7rk3fbrt4shLzsqpxv+zkzqN9oIo3bl+UpFRLo477U7O9Hl6Twn3wtFPlyye5FXiZ4F0gkuc9BleMiBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aabb6372c6fab5358147a91fc1278ef3a745c7c999079ee64763a757f10371f2","last_reissued_at":"2026-05-18T00:48:20.398208Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:48:20.398208Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Survey of Available Corpora for Building Data-Driven Dialogue Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.HC","cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Iulian Vlad Serban, Joelle Pineau, Laurent Charlin, Peter Henderson, Ryan Lowe","submitted_at":"2015-12-17T19:52:39Z","abstract_excerpt":"During the past decade, several areas of speech and language understanding have witnessed substantial breakthroughs from the use of data-driven models. In the area of dialogue systems, the trend is less obvious, and most practical systems are still built through significant engineering and expert knowledge. Nevertheless, several recent results suggest that data-driven approaches are feasible and quite promising. To facilitate research in this area, we have carried out a wide survey of publicly available datasets suitable for data-driven learning of dialogue systems. We discuss important charac"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.05742","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1512.05742","created_at":"2026-05-18T00:48:20.398311+00:00"},{"alias_kind":"arxiv_version","alias_value":"1512.05742v3","created_at":"2026-05-18T00:48:20.398311+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.05742","created_at":"2026-05-18T00:48:20.398311+00:00"},{"alias_kind":"pith_short_12","alias_value":"VK5WG4WG7K2T","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_16","alias_value":"VK5WG4WG7K2TLAKH","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_8","alias_value":"VK5WG4WG","created_at":"2026-05-18T12:29:44.643036+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.23235","citing_title":"Convex Low-resource Accent-Robust Language Detection in Speech Recognition","ref_index":21,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O","json":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O.json","graph_json":"https://pith.science/api/pith-number/VK5WG4WG7K2TLAKHVEP4CJ4O6O/graph.json","events_json":"https://pith.science/api/pith-number/VK5WG4WG7K2TLAKHVEP4CJ4O6O/events.json","paper":"https://pith.science/paper/VK5WG4WG"},"agent_actions":{"view_html":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O","download_json":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O.json","view_paper":"https://pith.science/paper/VK5WG4WG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1512.05742&json=true","fetch_graph":"https://pith.science/api/pith-number/VK5WG4WG7K2TLAKHVEP4CJ4O6O/graph.json","fetch_events":"https://pith.science/api/pith-number/VK5WG4WG7K2TLAKHVEP4CJ4O6O/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O/action/storage_attestation","attest_author":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O/action/author_attestation","sign_citation":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O/action/citation_signature","submit_replication":"https://pith.science/pith/VK5WG4WG7K2TLAKHVEP4CJ4O6O/action/replication_record"}},"created_at":"2026-05-18T00:48:20.398311+00:00","updated_at":"2026-05-18T00:48:20.398311+00:00"}