{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:GAHNLIGVIFTDCMYQADPA2DH3JB","short_pith_number":"pith:GAHNLIGV","schema_version":"1.0","canonical_sha256":"300ed5a0d5416631331000de0d0cfb485bb9405e682b1017d15d23ec3ecbae17","source":{"kind":"arxiv","id":"2604.18572","version":2},"attestation_state":"computed","paper":{"title":"Back into Plato's Cave: Examining Cross-modal Representational Convergence at Scale","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Evidence for cross-modal neural network convergence weakens at large scales and realistic conditions","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CV","authors_text":"Alexei A. Efros, A. Sophia Koepke, Daniil Zverev, Shiry Ginosar","submitted_at":"2026-04-20T17:56:02Z","abstract_excerpt":"The Platonic Representation Hypothesis suggests that neural networks trained on different modalities (e.g., text and images) align and eventually converge toward the same representation of reality. If true, this has significant implications for whether modality choice matters at all. We show that the experimental evidence for this hypothesis is fragile and depends critically on the evaluation regime. Alignment is measured using mutual nearest neighbors on small datasets ($\\approx$1K samples) and degrades substantially as the dataset is scaled to millions of samples. The same behavior is observ"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2604.18572","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-20T17:56:02Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"3ebd6dc6108b88b305009f20a03871a9b8c17084fbd8c3c227c1791bedcb2407","abstract_canon_sha256":"49517b19e26333b740c725f548c87bb36956011c9e171df64a3fdffde2c2c4d4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T02:05:47.682991Z","signature_b64":"ouecQ3K6f3L4ijdqIqJEjecj1PwlbLURRlt+Xmnc/W7UZjlkKj8rWSGVIophn5XeBJmDslKOOsj+Dyj0cqIMBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"300ed5a0d5416631331000de0d0cfb485bb9405e682b1017d15d23ec3ecbae17","last_reissued_at":"2026-06-03T02:05:47.682345Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T02:05:47.682345Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Back into Plato's Cave: Examining Cross-modal Representational Convergence at Scale","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Evidence for cross-modal neural network convergence weakens at large scales and realistic conditions","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CV","authors_text":"Alexei A. Efros, A. Sophia Koepke, Daniil Zverev, Shiry Ginosar","submitted_at":"2026-04-20T17:56:02Z","abstract_excerpt":"The Platonic Representation Hypothesis suggests that neural networks trained on different modalities (e.g., text and images) align and eventually converge toward the same representation of reality. If true, this has significant implications for whether modality choice matters at all. We show that the experimental evidence for this hypothesis is fragile and depends critically on the evaluation regime. Alignment is measured using mutual nearest neighbors on small datasets ($\\approx$1K samples) and degrades substantially as the dataset is scaled to millions of samples. The same behavior is observ"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We show that the experimental evidence for this hypothesis is fragile and depends critically on the evaluation regime. Alignment ... degrades substantially as the dataset is scaled to millions of samples.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That mutual nearest-neighbor overlap measured on large-scale, many-to-many image-text pairs is a faithful indicator of whether fine-grained representational structure has converged.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Evidence for cross-modal representational convergence weakens substantially at scale and in realistic many-to-many settings, indicating models learn rich but distinct representations.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Evidence for cross-modal neural network convergence weakens at large scales and realistic conditions","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d59c8e129ee91a439fb9ccab767a1aec5b1388fdf333a48aab62f3d7d3bf9239"},"source":{"id":"2604.18572","kind":"arxiv","version":2},"verdict":{"id":"3c3fbf6f-74e9-4a5d-be04-e3b109778a29","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T04:56:24.545900Z","strongest_claim":"We show that the experimental evidence for this hypothesis is fragile and depends critically on the evaluation regime. Alignment ... degrades substantially as the dataset is scaled to millions of samples.","one_line_summary":"Evidence for cross-modal representational convergence weakens substantially at scale and in realistic many-to-many settings, indicating models learn rich but distinct representations.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That mutual nearest-neighbor overlap measured on large-scale, many-to-many image-text pairs is a faithful indicator of whether fine-grained representational structure has converged.","pith_extraction_headline":"Evidence for cross-modal neural network convergence weakens at large scales and realistic conditions"},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.18572/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-20T03:52:35.846818Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"ab0fbcac7dbcbdcdfda18c29a191bf103ba53fdf61a7d4f1e2860ff71a5aecd3"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.18572","created_at":"2026-06-03T02:05:47.682464+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.18572v2","created_at":"2026-06-03T02:05:47.682464+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.18572","created_at":"2026-06-03T02:05:47.682464+00:00"},{"alias_kind":"pith_short_12","alias_value":"GAHNLIGVIFTD","created_at":"2026-06-03T02:05:47.682464+00:00"},{"alias_kind":"pith_short_16","alias_value":"GAHNLIGVIFTDCMYQ","created_at":"2026-06-03T02:05:47.682464+00:00"},{"alias_kind":"pith_short_8","alias_value":"GAHNLIGV","created_at":"2026-06-03T02:05:47.682464+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB","json":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB.json","graph_json":"https://pith.science/api/pith-number/GAHNLIGVIFTDCMYQADPA2DH3JB/graph.json","events_json":"https://pith.science/api/pith-number/GAHNLIGVIFTDCMYQADPA2DH3JB/events.json","paper":"https://pith.science/paper/GAHNLIGV"},"agent_actions":{"view_html":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB","download_json":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB.json","view_paper":"https://pith.science/paper/GAHNLIGV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.18572&json=true","fetch_graph":"https://pith.science/api/pith-number/GAHNLIGVIFTDCMYQADPA2DH3JB/graph.json","fetch_events":"https://pith.science/api/pith-number/GAHNLIGVIFTDCMYQADPA2DH3JB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB/action/storage_attestation","attest_author":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB/action/author_attestation","sign_citation":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB/action/citation_signature","submit_replication":"https://pith.science/pith/GAHNLIGVIFTDCMYQADPA2DH3JB/action/replication_record"}},"created_at":"2026-06-03T02:05:47.682464+00:00","updated_at":"2026-06-03T02:05:47.682464+00:00"}