{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:IFKSVAW5QK5GSD7357PE6DIKXV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"225de7e080e752d4cca2963a2295615bc8d1746e79a70f9e3c52c505fe46e6ed","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-04-29T22:46:28Z","title_canon_sha256":"810a5715ff28a9e5116ae16881835c4ce8b38c8a034e01bc970002cabb1c549d"},"schema_version":"1.0","source":{"id":"2004.14507","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2004.14507","created_at":"2026-07-05T01:41:30Z"},{"alias_kind":"arxiv_version","alias_value":"2004.14507v2","created_at":"2026-07-05T01:41:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2004.14507","created_at":"2026-07-05T01:41:30Z"},{"alias_kind":"pith_short_12","alias_value":"IFKSVAW5QK5G","created_at":"2026-07-05T01:41:30Z"},{"alias_kind":"pith_short_16","alias_value":"IFKSVAW5QK5GSD73","created_at":"2026-07-05T01:41:30Z"},{"alias_kind":"pith_short_8","alias_value":"IFKSVAW5","created_at":"2026-07-05T01:41:30Z"}],"graph_snapshots":[{"event_id":"sha256:ecbc557f09b37912cc6900f5e2c8302364fbac9d827edcea6956673ff896419e","target":"graph","created_at":"2026-07-05T01:41:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2004.14507/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Open-domain dialogue generation suffers from the data insufficiency problem due to the vast size of potential responses. In this paper, we propose to explore potential responses by counterfactual reasoning. Given an observed response, the counterfactual reasoning model automatically infers the outcome of an alternative policy that could have been taken. The resulting counterfactual response synthesized in hindsight is of higher quality than the response synthesized from scratch. Training on the counterfactual responses under the adversarial learning framework helps to explore the high-reward a","authors_text":"Qingfu Zhu, Ting Liu, Weinan Zhang, William Yang Wang","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-04-29T22:46:28Z","title":"Counterfactual Off-Policy Training for Neural Response Generation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2004.14507","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:01b09e8b91cc48b887895354d23869388a5e80c0045baadc863bfff576fb127a","target":"record","created_at":"2026-07-05T01:41:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"225de7e080e752d4cca2963a2295615bc8d1746e79a70f9e3c52c505fe46e6ed","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-04-29T22:46:28Z","title_canon_sha256":"810a5715ff28a9e5116ae16881835c4ce8b38c8a034e01bc970002cabb1c549d"},"schema_version":"1.0","source":{"id":"2004.14507","kind":"arxiv","version":2}},"canonical_sha256":"41552a82dd82ba690ffbefde4f0d0abd555c785b6a309a0a73b5778797455cdb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"41552a82dd82ba690ffbefde4f0d0abd555c785b6a309a0a73b5778797455cdb","first_computed_at":"2026-07-05T01:41:30.934931Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T01:41:30.934931Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Cn23uaKE7eUgc/SoYzdZXxsSy3jinxGmp6E7ZLeO5dzjXeiPv74fjFravcbjEU616ACDKw7rgGDXoTz+LieFDA==","signature_status":"signed_v1","signed_at":"2026-07-05T01:41:30.935273Z","signed_message":"canonical_sha256_bytes"},"source_id":"2004.14507","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:01b09e8b91cc48b887895354d23869388a5e80c0045baadc863bfff576fb127a","sha256:ecbc557f09b37912cc6900f5e2c8302364fbac9d827edcea6956673ff896419e"],"state_sha256":"26c1369bb3bb736fcc02df5c201f1eecfe1f97c95f5abe6e05969d2f0a2fd987"}