{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CANKSGJCMJ27YILM7CFQTPD7ZZ","short_pith_number":"pith:CANKSGJC","schema_version":"1.0","canonical_sha256":"101aa919226275fc216cf88b09bc7fce7ad8b0e56851ac426399907b880828ce","source":{"kind":"arxiv","id":"2606.13105","version":1},"attestation_state":"computed","paper":{"title":"Disparate Impact in Synthetic Data Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Batiste Le Bars, Marc Tommasi, Micha\\\"el Perrot, Paul Andrey","submitted_at":"2026-06-11T09:33:12Z","abstract_excerpt":"We revisit the fairness notion of disparate impact for synthetic data generation (SDG), that assesses whether the utility of generated records is the same across sensitive groups. Our approach departs from existing work on fair SDG, that address the problem of correcting for undue biases in the observed distribution, hence redefining SDG as learning a distribution that is not that of the real data. By contrast, non-disparate impact is notably achieved when the synthetic and real distributions are the same. We expose reasons why SDG may fail to reach that solution and discuss why approximation "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.13105","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-11T09:33:12Z","cross_cats_sorted":[],"title_canon_sha256":"19382d0825187656bec29ee2adb9102d1291e44aa4ae6c5ea5ec553479158ed4","abstract_canon_sha256":"472163f854a8e1d1a37451aab9b5d38484530a1d031ae035a0eec53ef13f392c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:09:40.381372Z","signature_b64":"bdtRvWrOGmC4frMKntPiGeM225MiJKbKRzt9sNxxghd4GQabMNlH5yIhkc/apYaHCbLa4sG6Uafg6p65lRzPDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"101aa919226275fc216cf88b09bc7fce7ad8b0e56851ac426399907b880828ce","last_reissued_at":"2026-06-12T01:09:40.380691Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:09:40.380691Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Disparate Impact in Synthetic Data Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Batiste Le Bars, Marc Tommasi, Micha\\\"el Perrot, Paul Andrey","submitted_at":"2026-06-11T09:33:12Z","abstract_excerpt":"We revisit the fairness notion of disparate impact for synthetic data generation (SDG), that assesses whether the utility of generated records is the same across sensitive groups. Our approach departs from existing work on fair SDG, that address the problem of correcting for undue biases in the observed distribution, hence redefining SDG as learning a distribution that is not that of the real data. By contrast, non-disparate impact is notably achieved when the synthetic and real distributions are the same. We expose reasons why SDG may fail to reach that solution and discuss why approximation "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.13105","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.13105/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.13105","created_at":"2026-06-12T01:09:40.380800+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.13105v1","created_at":"2026-06-12T01:09:40.380800+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.13105","created_at":"2026-06-12T01:09:40.380800+00:00"},{"alias_kind":"pith_short_12","alias_value":"CANKSGJCMJ27","created_at":"2026-06-12T01:09:40.380800+00:00"},{"alias_kind":"pith_short_16","alias_value":"CANKSGJCMJ27YILM","created_at":"2026-06-12T01:09:40.380800+00:00"},{"alias_kind":"pith_short_8","alias_value":"CANKSGJC","created_at":"2026-06-12T01:09:40.380800+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ","json":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ.json","graph_json":"https://pith.science/api/pith-number/CANKSGJCMJ27YILM7CFQTPD7ZZ/graph.json","events_json":"https://pith.science/api/pith-number/CANKSGJCMJ27YILM7CFQTPD7ZZ/events.json","paper":"https://pith.science/paper/CANKSGJC"},"agent_actions":{"view_html":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ","download_json":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ.json","view_paper":"https://pith.science/paper/CANKSGJC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.13105&json=true","fetch_graph":"https://pith.science/api/pith-number/CANKSGJCMJ27YILM7CFQTPD7ZZ/graph.json","fetch_events":"https://pith.science/api/pith-number/CANKSGJCMJ27YILM7CFQTPD7ZZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ/action/storage_attestation","attest_author":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ/action/author_attestation","sign_citation":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ/action/citation_signature","submit_replication":"https://pith.science/pith/CANKSGJCMJ27YILM7CFQTPD7ZZ/action/replication_record"}},"created_at":"2026-06-12T01:09:40.380800+00:00","updated_at":"2026-06-12T01:09:40.380800+00:00"}