{"schema":"https://pith.science/schemas/pith-integrity/v1.json","pith_number":"2605.11396","arxiv_id":"2605.11396","integrity":{"available":true,"endpoint":"/pith/2605.11396/integrity.json","summary":{"critical":0,"advisory":1,"informational":0,"by_detector":{"doi_compliance":{"total":1,"critical":0,"advisory":1,"informational":0}}},"clean":false,"detectors_run":[{"name":"claim_evidence","version":"1.0.0","status":"completed","ran_at":"2026-05-20T04:22:00.508716Z","findings_count":0},{"name":"ai_meta_artifact","version":"1.0.0","status":"completed","ran_at":"2026-05-19T12:36:42.035555Z","findings_count":0},{"name":"doi_title_agreement","version":"1.0.0","status":"completed","ran_at":"2026-05-19T10:01:16.677594Z","findings_count":0},{"name":"doi_compliance","version":"1.0.0","status":"completed","ran_at":"2026-05-19T08:30:02.647861Z","findings_count":1}],"findings":[{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/D18-1260.Guilherme) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/D18-1260.Guilherme","detected_arxiv_id":null,"ref_index":11,"audited_at":"2026-05-19T08:30:02.647861Z"}],"snapshot_sha256":"d864e7b4f080e179e48dc197722bc6247b35d3313b8031e2b1eaedc7b21d3ca7"},"events":[{"event_id":483,"event_type":"pith.integrity.v1","payload_sha256":"88e314143490a6e2c6669cfe8f3042f78cb6dc593cff79e3b3c660a80696a7d2","signature_b64":"eqKAfZ3xRMgFfI4o2pgQ7BrybcYTBKO9NE2MRejhVAKzJysI4Uo0bRIPrAB+yzZEf2KpvHHp8yPPflX6pHvJDA==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T08:31:56.406013+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/D18-1260.Guilherme) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Association for Computational Linguistics. doi: 10.18653/v1/ D18-1260. Guilherme Penedo, Hynek Kydl´ıˇcek, Loubna Ben Allal, Anton Lozhkov, Margaret Mitchell, Colin Raffel, Leandro von Werra, and Thomas Wolf. The FineWeb datasets: Decanting","arxiv_id":"2605.11396","detector":"doi_compliance","evidence":{"ref_index":11,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Association for Computational Linguistics. doi: 10.18653/v1/ D18-1260. Guilherme Penedo, Hynek Kydl´ıˇcek, Loubna Ben Allal, Anton Lozhkov, Margaret Mitchell, Colin Raffel, Leandro von Werra, and Thomas Wolf. The FineWeb datasets: Decanting","reconstructed_doi":"10.18653/v1/D18-1260.Guilherme"},"severity":"advisory","ref_index":11,"audited_at":"2026-05-19T08:30:02.647861Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/D18-1260.Guilherme","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"91259d2237c3bfce53efb1f3673251bd3b1c4aa3c1da04b1a3b439e7ffd4af5b","paper_version":1,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}}],"endpoint_self":"/pith/2605.11396/integrity.json","protocol_url":"https://pith.science/pith-integrity-protocol"}