{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:RYWEI2CX4PSQFA6TAYNZVDUFOS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f2b2aae46f9790808bafd9ca6cb6cda15134e99985b5f57ff8c8358e3b3319a9","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-02T04:45:02Z","title_canon_sha256":"f3f42ade27a1391030569a91ba1cbd66617f45a19466490b25a8c639eb5e7a58"},"schema_version":"1.0","source":{"id":"2606.03148","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03148","created_at":"2026-06-03T01:05:33Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03148v1","created_at":"2026-06-03T01:05:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03148","created_at":"2026-06-03T01:05:33Z"},{"alias_kind":"pith_short_12","alias_value":"RYWEI2CX4PSQ","created_at":"2026-06-03T01:05:33Z"},{"alias_kind":"pith_short_16","alias_value":"RYWEI2CX4PSQFA6T","created_at":"2026-06-03T01:05:33Z"},{"alias_kind":"pith_short_8","alias_value":"RYWEI2CX","created_at":"2026-06-03T01:05:33Z"}],"graph_snapshots":[{"event_id":"sha256:0079a7b7d239a20c7530acf82a54be9a231433969c15d22a3972641e618d77b2","target":"graph","created_at":"2026-06-03T01:05:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.03148/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Robust visual classification often depends on localizing the main foreground objects in an image while ignoring contextual distractors. Surprisingly, we find that the attention maps of smaller self-supervised ViTs localize foreground objects better than those of larger ViTs. However, we still need large ViTs, because they extract richer representations from each patch. To get the best of both worlds, good localization and rich representations, we propose $A^2$, a simple method that leverages this inverse scaling finding by decoupling where to look (a small attention model) from what to extract","authors_text":"Carl Vondrick, Huy Ha, Sreehari Rammohan","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-02T04:45:02Z","title":"$A^2$: Smaller Self-Supervised ViTs Localize Better than Larger Ones"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03148","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dab2ed47a3139ff0b32ef26b1400f6028dee993533189f5a7a81ce8559981b97","target":"record","created_at":"2026-06-03T01:05:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f2b2aae46f9790808bafd9ca6cb6cda15134e99985b5f57ff8c8358e3b3319a9","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-02T04:45:02Z","title_canon_sha256":"f3f42ade27a1391030569a91ba1cbd66617f45a19466490b25a8c639eb5e7a58"},"schema_version":"1.0","source":{"id":"2606.03148","kind":"arxiv","version":1}},"canonical_sha256":"8e2c446857e3e50283d3061b9a8e85749dc5324aa48b6398fa5f97889fb6a12a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8e2c446857e3e50283d3061b9a8e85749dc5324aa48b6398fa5f97889fb6a12a","first_computed_at":"2026-06-03T01:05:33.234725Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T01:05:33.234725Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3zIoIxHRhqbBUpbNsuzCv4o+zCerPlpJd6OFBtY3fMNi7ZpF8wFln2FvhI6dSiyN4BXw1oQnZI3XuCX8wb3FDg==","signature_status":"signed_v1","signed_at":"2026-06-03T01:05:33.235122Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.03148","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dab2ed47a3139ff0b32ef26b1400f6028dee993533189f5a7a81ce8559981b97","sha256:0079a7b7d239a20c7530acf82a54be9a231433969c15d22a3972641e618d77b2"],"state_sha256":"3db189962f9e00a0f265cee5c9578d07ec08cdd7f82965527296ee4d63ecc3fc"}