{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4P7MDSIT6Z65IQUOGKLMAQ7FLM","short_pith_number":"pith:4P7MDSIT","canonical_record":{"source":{"id":"2605.12736","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-12T20:40:22Z","cross_cats_sorted":[],"title_canon_sha256":"679db2f59def5caefe5cf8462f1096504cc977f8d44a03440bb1294497a466e8","abstract_canon_sha256":"54a9d32132385b6400d4ac8826ecedf7ac497fa79384c7224f84a9cb052cb387"},"schema_version":"1.0"},"canonical_sha256":"e3fec1c913f67dd4428e3296c043e55b3715588b3027ac82d80cab444c15cd16","source":{"kind":"arxiv","id":"2605.12736","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12736","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12736v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12736","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"4P7MDSIT6Z65","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4P7MDSIT6Z65IQUO","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4P7MDSIT","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4P7MDSIT6Z65IQUOGKLMAQ7FLM","target":"record","payload":{"canonical_record":{"source":{"id":"2605.12736","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-12T20:40:22Z","cross_cats_sorted":[],"title_canon_sha256":"679db2f59def5caefe5cf8462f1096504cc977f8d44a03440bb1294497a466e8","abstract_canon_sha256":"54a9d32132385b6400d4ac8826ecedf7ac497fa79384c7224f84a9cb052cb387"},"schema_version":"1.0"},"canonical_sha256":"e3fec1c913f67dd4428e3296c043e55b3715588b3027ac82d80cab444c15cd16","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:49.156569Z","signature_b64":"87fAFS676rDzUL38bjtbS3iiR/bTn3g2tNHsyzraPmuH/T13Hn9PilNw3SpA63TmK+qp2UHfHHDb7bHWRag4Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e3fec1c913f67dd4428e3296c043e55b3715588b3027ac82d80cab444c15cd16","last_reissued_at":"2026-05-18T03:09:49.155711Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:49.155711Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.12736","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xh3C759HHuQ4UqOqgrzX2hHC3jfXXR1KpnkDsqz76BcTluOm/izpGAT/33C23IDjR0MGM32SqbQFGBQr9bGPAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:38:31.524453Z"},"content_sha256":"6586e465127c1a390678393e0d5d1bb85979c996adf0dd5d5c4ee3ed2503171e","schema_version":"1.0","event_id":"sha256:6586e465127c1a390678393e0d5d1bb85979c996adf0dd5d5c4ee3ed2503171e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4P7MDSIT6Z65IQUOGKLMAQ7FLM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ConRetroBert: EMA Stabilized Dual Encoders for Template-Based Single-Step Retrosynthesis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Dual encoders with EMA stabilization lift template-based retrosynthesis top-1 accuracy from 50.5% to 62.4% on USPTO-50k.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Ali Khodabandeh Yalabadi, Ivan Garibay, Mohammad Jahid Ibna Basher, Ozlem Ozmen Garibay","submitted_at":"2026-05-12T20:40:22Z","abstract_excerpt":"Template based single step retrosynthesis predicts reactants by selecting and applying an explicit reaction template, making each prediction traceable to a chemical transformation rule. This is useful for synthesis planning, but template based methods are often viewed as less competitive than template free models because template prediction is commonly formulated as global classification over a long tailed rule library. We argue that this weakness is not inherent to templates, but to the learning formulation. We present ConRetroBert, a dual encoder framework that reframes template based retros"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"On the local USPTO-50k benchmark, Stage 2 candidate set ranking improves top-1 reaction accuracy from 50.5% to 61.3%, while EMA stabilized template adaptation further improves it to 62.4%. Fine tuning from a leakage controlled USPTO-Full checkpoint reaches 75.4% top-1 accuracy on USPTO-50k.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The contrastive pretraining produces a shared embedding space in which nearest-neighbor retrieval over templates meaningfully corresponds to chemically valid reactant predictions, and that the mined hard-negative sets remain stable under the EMA update without introducing systematic bias.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"ConRetroBert achieves 62.4% top-1 accuracy on USPTO-50k by combining contrastive pretraining, hard-negative listwise ranking, and EMA-stabilized dual encoders for template retrieval in retrosynthesis.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Dual encoders with EMA stabilization lift template-based retrosynthesis top-1 accuracy from 50.5% to 62.4% on USPTO-50k.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"6eda8d2c4b5034256452438208c1e041ba068f220823e77c260ae6c1b60b1130"},"source":{"id":"2605.12736","kind":"arxiv","version":1},"verdict":{"id":"3c683217-027d-4674-9855-cd4d49a84fd9","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T21:24:05.785603Z","strongest_claim":"On the local USPTO-50k benchmark, Stage 2 candidate set ranking improves top-1 reaction accuracy from 50.5% to 61.3%, while EMA stabilized template adaptation further improves it to 62.4%. Fine tuning from a leakage controlled USPTO-Full checkpoint reaches 75.4% top-1 accuracy on USPTO-50k.","one_line_summary":"ConRetroBert achieves 62.4% top-1 accuracy on USPTO-50k by combining contrastive pretraining, hard-negative listwise ranking, and EMA-stabilized dual encoders for template retrieval in retrosynthesis.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The contrastive pretraining produces a shared embedding space in which nearest-neighbor retrieval over templates meaningfully corresponds to chemically valid reactant predictions, and that the mined hard-negative sets remain stable under the EMA update without introducing systematic bias.","pith_extraction_headline":"Dual encoders with EMA stabilization lift template-based retrosynthesis top-1 accuracy from 50.5% to 62.4% on USPTO-50k."},"references":{"count":28,"sample":[{"doi":"","year":2017,"title":"Chemistry--A European Journal , volume=","work_id":"86f42883-c8c8-4a9b-b281-3631d1da315c","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"ACS central science , volume=","work_id":"946dc2c2-5169-4cd4-8e7c-ebfe8adc7840","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Digital Discovery , volume=","work_id":"33fbbd53-4933-40c0-8e31-4f7b49ef8476","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Nature Communications , volume=","work_id":"2a09c309-5b89-42ba-b176-9cc66b2b89c6","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Nature Communications , volume=","work_id":"93df5eb1-7a1a-46a2-b501-3bda5c7ef6db","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":28,"snapshot_sha256":"fe6cfe86f73ccdb0f6a7f507be166d2514239db626cae92d8fc5b25c42ccde5a","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f75678b9e8fa59f4af1d1bb793005d0526f8aa7cac855bf6514705245583e4e8"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"3c683217-027d-4674-9855-cd4d49a84fd9"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pe3F/iwok/e6QTZSL031nUxL00Q7qDBc06zGaJaqjOMSskW8EAJwFe19xI6KQSzpedp5CExGFHk1nnKYlcsiBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:38:31.524965Z"},"content_sha256":"2290545d9f54019b40572b7e1b69b389567ad334fe4228eeaee5f8813d47c777","schema_version":"1.0","event_id":"sha256:2290545d9f54019b40572b7e1b69b389567ad334fe4228eeaee5f8813d47c777"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4P7MDSIT6Z65IQUOGKLMAQ7FLM/bundle.json","state_url":"https://pith.science/pith/4P7MDSIT6Z65IQUOGKLMAQ7FLM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4P7MDSIT6Z65IQUOGKLMAQ7FLM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T03:38:31Z","links":{"resolver":"https://pith.science/pith/4P7MDSIT6Z65IQUOGKLMAQ7FLM","bundle":"https://pith.science/pith/4P7MDSIT6Z65IQUOGKLMAQ7FLM/bundle.json","state":"https://pith.science/pith/4P7MDSIT6Z65IQUOGKLMAQ7FLM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4P7MDSIT6Z65IQUOGKLMAQ7FLM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4P7MDSIT6Z65IQUOGKLMAQ7FLM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"54a9d32132385b6400d4ac8826ecedf7ac497fa79384c7224f84a9cb052cb387","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-12T20:40:22Z","title_canon_sha256":"679db2f59def5caefe5cf8462f1096504cc977f8d44a03440bb1294497a466e8"},"schema_version":"1.0","source":{"id":"2605.12736","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12736","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12736v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12736","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"4P7MDSIT6Z65","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4P7MDSIT6Z65IQUO","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4P7MDSIT","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:2290545d9f54019b40572b7e1b69b389567ad334fe4228eeaee5f8813d47c777","target":"graph","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"On the local USPTO-50k benchmark, Stage 2 candidate set ranking improves top-1 reaction accuracy from 50.5% to 61.3%, while EMA stabilized template adaptation further improves it to 62.4%. Fine tuning from a leakage controlled USPTO-Full checkpoint reaches 75.4% top-1 accuracy on USPTO-50k."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The contrastive pretraining produces a shared embedding space in which nearest-neighbor retrieval over templates meaningfully corresponds to chemically valid reactant predictions, and that the mined hard-negative sets remain stable under the EMA update without introducing systematic bias."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ConRetroBert achieves 62.4% top-1 accuracy on USPTO-50k by combining contrastive pretraining, hard-negative listwise ranking, and EMA-stabilized dual encoders for template retrieval in retrosynthesis."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Dual encoders with EMA stabilization lift template-based retrosynthesis top-1 accuracy from 50.5% to 62.4% on USPTO-50k."}],"snapshot_sha256":"6eda8d2c4b5034256452438208c1e041ba068f220823e77c260ae6c1b60b1130"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f75678b9e8fa59f4af1d1bb793005d0526f8aa7cac855bf6514705245583e4e8"},"paper":{"abstract_excerpt":"Template based single step retrosynthesis predicts reactants by selecting and applying an explicit reaction template, making each prediction traceable to a chemical transformation rule. This is useful for synthesis planning, but template based methods are often viewed as less competitive than template free models because template prediction is commonly formulated as global classification over a long tailed rule library. We argue that this weakness is not inherent to templates, but to the learning formulation. We present ConRetroBert, a dual encoder framework that reframes template based retros","authors_text":"Ali Khodabandeh Yalabadi, Ivan Garibay, Mohammad Jahid Ibna Basher, Ozlem Ozmen Garibay","cross_cats":[],"headline":"Dual encoders with EMA stabilization lift template-based retrosynthesis top-1 accuracy from 50.5% to 62.4% on USPTO-50k.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-12T20:40:22Z","title":"ConRetroBert: EMA Stabilized Dual Encoders for Template-Based Single-Step Retrosynthesis"},"references":{"count":28,"internal_anchors":0,"resolved_work":28,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Chemistry--A European Journal , volume=","work_id":"86f42883-c8c8-4a9b-b281-3631d1da315c","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"ACS central science , volume=","work_id":"946dc2c2-5169-4cd4-8e7c-ebfe8adc7840","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Digital Discovery , volume=","work_id":"33fbbd53-4933-40c0-8e31-4f7b49ef8476","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Nature Communications , volume=","work_id":"2a09c309-5b89-42ba-b176-9cc66b2b89c6","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Nature Communications , volume=","work_id":"93df5eb1-7a1a-46a2-b501-3bda5c7ef6db","year":2025}],"snapshot_sha256":"fe6cfe86f73ccdb0f6a7f507be166d2514239db626cae92d8fc5b25c42ccde5a"},"source":{"id":"2605.12736","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T21:24:05.785603Z","id":"3c683217-027d-4674-9855-cd4d49a84fd9","model_set":{"reader":"grok-4.3"},"one_line_summary":"ConRetroBert achieves 62.4% top-1 accuracy on USPTO-50k by combining contrastive pretraining, hard-negative listwise ranking, and EMA-stabilized dual encoders for template retrieval in retrosynthesis.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Dual encoders with EMA stabilization lift template-based retrosynthesis top-1 accuracy from 50.5% to 62.4% on USPTO-50k.","strongest_claim":"On the local USPTO-50k benchmark, Stage 2 candidate set ranking improves top-1 reaction accuracy from 50.5% to 61.3%, while EMA stabilized template adaptation further improves it to 62.4%. Fine tuning from a leakage controlled USPTO-Full checkpoint reaches 75.4% top-1 accuracy on USPTO-50k.","weakest_assumption":"The contrastive pretraining produces a shared embedding space in which nearest-neighbor retrieval over templates meaningfully corresponds to chemically valid reactant predictions, and that the mined hard-negative sets remain stable under the EMA update without introducing systematic bias."}},"verdict_id":"3c683217-027d-4674-9855-cd4d49a84fd9"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6586e465127c1a390678393e0d5d1bb85979c996adf0dd5d5c4ee3ed2503171e","target":"record","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"54a9d32132385b6400d4ac8826ecedf7ac497fa79384c7224f84a9cb052cb387","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-12T20:40:22Z","title_canon_sha256":"679db2f59def5caefe5cf8462f1096504cc977f8d44a03440bb1294497a466e8"},"schema_version":"1.0","source":{"id":"2605.12736","kind":"arxiv","version":1}},"canonical_sha256":"e3fec1c913f67dd4428e3296c043e55b3715588b3027ac82d80cab444c15cd16","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e3fec1c913f67dd4428e3296c043e55b3715588b3027ac82d80cab444c15cd16","first_computed_at":"2026-05-18T03:09:49.155711Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:49.155711Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"87fAFS676rDzUL38bjtbS3iiR/bTn3g2tNHsyzraPmuH/T13Hn9PilNw3SpA63TmK+qp2UHfHHDb7bHWRag4Bw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:49.156569Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12736","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6586e465127c1a390678393e0d5d1bb85979c996adf0dd5d5c4ee3ed2503171e","sha256:2290545d9f54019b40572b7e1b69b389567ad334fe4228eeaee5f8813d47c777"],"state_sha256":"c489a1cb6e58c1127f872e5978fe7b6f8b790a366d25e92e4460814bd41f3752"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jmSOwXbZOq7FnYqbnwwpgXy05wK/Q12dJqCy9ofUUo7ogheBQlpjZdAF3kJOKczAu7CdS+JlnB2lGcI/Pxi9Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T03:38:31.527349Z","bundle_sha256":"98dd2e80a40f34d1f2a27a690a3dbd81867ec94ce18ff4c969d0e65b50079859"}}