{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:YABOGVDYH2XPTG5O6EWRSCESDK","short_pith_number":"pith:YABOGVDY","schema_version":"1.0","canonical_sha256":"c002e354783eaef99baef12d1908921a969968ff898e89b274f90877d54d6c20","source":{"kind":"arxiv","id":"2405.13003","version":2},"attestation_state":"computed","paper":{"title":"A Survey on Recent Advances in Conversational Data Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CL","authors_text":"Evangelos Kanoulas, Faegheh Hasibi, Heydar Soudani, Roxana Petcu","submitted_at":"2024-05-12T10:11:12Z","abstract_excerpt":"Recent advancements in conversational systems have significantly enhanced human-machine interactions across various domains. However, training these systems is challenging due to the scarcity of specialized dialogue data. Traditionally, conversational datasets were created through crowdsourcing, but this method has proven costly, limited in scale, and labor-intensive. As a solution, the development of synthetic dialogue data has emerged, utilizing techniques to augment existing datasets or convert textual resources into conversational formats, providing a more efficient and scalable approach t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2405.13003","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-05-12T10:11:12Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"f8a4d07cf2b579c568bdb69cd9e8e3524f600837a092cb6148f0621678b011d9","abstract_canon_sha256":"7c2f439ff499548e8f324f433a7ff449bdf7f65083de2cfa3a9583223dded9ba"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T02:05:32.494954Z","signature_b64":"9IoSg+7nhZ1JkXf6r94AAufbrSURXS2bygf7rPlqRYHaLDf9hXvNQEsFsOes4WR5Fu9kliVXyYD5QdcDDgd0AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c002e354783eaef99baef12d1908921a969968ff898e89b274f90877d54d6c20","last_reissued_at":"2026-05-29T02:05:32.494085Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T02:05:32.494085Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Survey on Recent Advances in Conversational Data Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CL","authors_text":"Evangelos Kanoulas, Faegheh Hasibi, Heydar Soudani, Roxana Petcu","submitted_at":"2024-05-12T10:11:12Z","abstract_excerpt":"Recent advancements in conversational systems have significantly enhanced human-machine interactions across various domains. However, training these systems is challenging due to the scarcity of specialized dialogue data. Traditionally, conversational datasets were created through crowdsourcing, but this method has proven costly, limited in scale, and labor-intensive. As a solution, the development of synthetic dialogue data has emerged, utilizing techniques to augment existing datasets or convert textual resources into conversational formats, providing a more efficient and scalable approach t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2405.13003","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2405.13003/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2405.13003","created_at":"2026-05-29T02:05:32.494216+00:00"},{"alias_kind":"arxiv_version","alias_value":"2405.13003v2","created_at":"2026-05-29T02:05:32.494216+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2405.13003","created_at":"2026-05-29T02:05:32.494216+00:00"},{"alias_kind":"pith_short_12","alias_value":"YABOGVDYH2XP","created_at":"2026-05-29T02:05:32.494216+00:00"},{"alias_kind":"pith_short_16","alias_value":"YABOGVDYH2XPTG5O","created_at":"2026-05-29T02:05:32.494216+00:00"},{"alias_kind":"pith_short_8","alias_value":"YABOGVDY","created_at":"2026-05-29T02:05:32.494216+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2501.04410","citing_title":"User Simulation in the Era of Generative AI: User Modeling, Synthetic Data Generation, and System Evaluation","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2508.18167","citing_title":"DiscussLLM: Teaching Large Language Models When to Speak","ref_index":24,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK","json":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK.json","graph_json":"https://pith.science/api/pith-number/YABOGVDYH2XPTG5O6EWRSCESDK/graph.json","events_json":"https://pith.science/api/pith-number/YABOGVDYH2XPTG5O6EWRSCESDK/events.json","paper":"https://pith.science/paper/YABOGVDY"},"agent_actions":{"view_html":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK","download_json":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK.json","view_paper":"https://pith.science/paper/YABOGVDY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2405.13003&json=true","fetch_graph":"https://pith.science/api/pith-number/YABOGVDYH2XPTG5O6EWRSCESDK/graph.json","fetch_events":"https://pith.science/api/pith-number/YABOGVDYH2XPTG5O6EWRSCESDK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK/action/storage_attestation","attest_author":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK/action/author_attestation","sign_citation":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK/action/citation_signature","submit_replication":"https://pith.science/pith/YABOGVDYH2XPTG5O6EWRSCESDK/action/replication_record"}},"created_at":"2026-05-29T02:05:32.494216+00:00","updated_at":"2026-05-29T02:05:32.494216+00:00"}