{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:XYS6WLT2PVXMAUU74ZMDNVKXGC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"921a446b47f1dc21d4fbdb0c3baa76fb32522467c14529a687d6bf5fa7383650","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-27T17:03:45Z","title_canon_sha256":"1fe89d640f3a77efdba81cc09dbf8bf547a3336c18906b7f0ab8b6edb7c1eaab"},"schema_version":"1.0","source":{"id":"2602.24181","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.24181","created_at":"2026-06-09T01:04:41Z"},{"alias_kind":"arxiv_version","alias_value":"2602.24181v2","created_at":"2026-06-09T01:04:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.24181","created_at":"2026-06-09T01:04:41Z"},{"alias_kind":"pith_short_12","alias_value":"XYS6WLT2PVXM","created_at":"2026-06-09T01:04:41Z"},{"alias_kind":"pith_short_16","alias_value":"XYS6WLT2PVXMAUU7","created_at":"2026-06-09T01:04:41Z"},{"alias_kind":"pith_short_8","alias_value":"XYS6WLT2","created_at":"2026-06-09T01:04:41Z"}],"graph_snapshots":[{"event_id":"sha256:78ca5affe8bab5dc4ddfc6762d34dbd2d23f477c3b51f681e6cd65946b9684fa","target":"graph","created_at":"2026-06-09T01:04:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.24181/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Pre-trained vision encoders like DINOv2 have demonstrated exceptional performance on unimodal tasks. However, we observe that their features are poorly aligned across different visual modalities. For instance, the feature embedding for an RGB image and its corresponding depth map of the same scene exhibit a cosine similarity that is nearly identical to that of two random, unrelated images. To address this, we propose the Omnivorous Vision Encoder, a post-training framework that learns a modality-agnostic feature space. We fine-tune the encoder with a dual objective: first, to maximize the feat","authors_text":"Andre Araujo, Drew A. Hudson, Joao Carreira, Maks Ovsjanikov, Niloy J. Mitra, Rishabh Kabra, Skanda Koppula, Ye Xia","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-27T17:03:45Z","title":"A Mixed Diet Makes DINO An Omnivorous Vision Encoder"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.24181","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e026703be72beefd9d2d0c0951847f6128fd409ff544e2122d0cdc6b0b36c42d","target":"record","created_at":"2026-06-09T01:04:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"921a446b47f1dc21d4fbdb0c3baa76fb32522467c14529a687d6bf5fa7383650","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-27T17:03:45Z","title_canon_sha256":"1fe89d640f3a77efdba81cc09dbf8bf547a3336c18906b7f0ab8b6edb7c1eaab"},"schema_version":"1.0","source":{"id":"2602.24181","kind":"arxiv","version":2}},"canonical_sha256":"be25eb2e7a7d6ec0529fe65836d557309e51b7d44b4c8d88f773b1fc9c040de7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"be25eb2e7a7d6ec0529fe65836d557309e51b7d44b4c8d88f773b1fc9c040de7","first_computed_at":"2026-06-09T01:04:41.775617Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:04:41.775617Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5ITdpsyp0DAuJlWOiwcqKB9Nn5Ndvt90pYwRllQbejsof+O8pJbvZxs5WqTi6My6RpSW5gDLByrcdciAUQuDBA==","signature_status":"signed_v1","signed_at":"2026-06-09T01:04:41.776139Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.24181","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e026703be72beefd9d2d0c0951847f6128fd409ff544e2122d0cdc6b0b36c42d","sha256:78ca5affe8bab5dc4ddfc6762d34dbd2d23f477c3b51f681e6cd65946b9684fa"],"state_sha256":"77d73dec9353a4c9e9a53d8cedfde697aac39d3b94a48929924265d05841e2e7"}