{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:W75NYE45WBDL6GXN7YZUPGFTD3","merge_version":"pith-open-graph-merge-v1","event_count":6,"valid_event_count":6,"invalid_event_count":0,"equivocation_count":1,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c0214041ba1670db67fa8dd2c3780c1dc6e41cb4310dd625fd438dcb5457286a","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-22T17:45:01Z","title_canon_sha256":"9387b6343e176d442a6f5923dbc23dc55f191ad54de8da8c1ac76fbffc5ba10b"},"schema_version":"1.0","source":{"id":"2605.23883","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23883","created_at":"2026-05-25T02:02:37Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23883v1","created_at":"2026-05-25T02:02:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23883","created_at":"2026-05-25T02:02:37Z"},{"alias_kind":"pith_short_12","alias_value":"W75NYE45WBDL","created_at":"2026-05-25T02:02:37Z"},{"alias_kind":"pith_short_16","alias_value":"W75NYE45WBDL6GXN","created_at":"2026-05-25T02:02:37Z"},{"alias_kind":"pith_short_8","alias_value":"W75NYE45","created_at":"2026-05-25T02:02:37Z"}],"graph_snapshots":[{"event_id":"sha256:22d1d8d9dc59e440c0c9c185c3497ae7a2e216937fb28b46ab7fee3085bb3140","target":"graph","created_at":"2026-05-25T02:02:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.23883/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Despite remarkable progress in Multimodal Large Language Models (MLLMs), these models still struggle with fine-grained understanding tasks. In this work, we propose Procedurally Generated Tasks (PGT), a simple data-driven framework that serves a dual purpose: inducing fine-grained visual understanding and acting as a low-cost diagnostic tool to identify the source of perception failures. By overlaying unambiguous geometric primitives on images, PGT generate additional dense supervision that disentangles visual grounding capability from semantic priors. Extensive experiments on relational, quan","authors_text":"Adriana Romero-Soriano, Amir Bar, Michal Drozdzal, Rim Assouel","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-22T17:45:01Z","title":"PGT: Procedurally Generated Tasks for improving visual grounding in MLLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23883","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5ae8eccf089397c893ae839ebe2126ff7d9125c0b6e67a205621bc63da8dea3e","target":"record","created_at":"2026-05-25T02:02:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c0214041ba1670db67fa8dd2c3780c1dc6e41cb4310dd625fd438dcb5457286a","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-22T17:45:01Z","title_canon_sha256":"9387b6343e176d442a6f5923dbc23dc55f191ad54de8da8c1ac76fbffc5ba10b"},"schema_version":"1.0","source":{"id":"2605.23883","kind":"arxiv","version":1}},"canonical_sha256":"b7fadc139db046bf1aedfe334798b31ed97ecdcc3dde14edfac753afa75b40b4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b7fadc139db046bf1aedfe334798b31ed97ecdcc3dde14edfac753afa75b40b4","first_computed_at":"2026-05-25T02:02:37.470960Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:02:37.470960Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"28fRkyWw/qATsKA4JoxdwTK+gUBJqVCuSLRueOTeXFxHs5tWjh2XzIbnhpyp8dMUQoZ12f7tCm1i4lzm7yVpCw==","signature_status":"signed_v1","signed_at":"2026-05-25T02:02:37.471608Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.23883","source_kind":"arxiv","source_version":1}}},"equivocations":[{"signer_id":"pith.science","event_type":"integrity_finding","target":"integrity","event_ids":["sha256:02e53c4f2d15514e722ddb68ca8675fc1da24c0d4ff3b5c59f1287ad83ce08ac","sha256:09593718ce818ade292edde031c4ab950768f42d8c4ca6c556a6e86b7ac86c87","sha256:bbc0c32ab422987f2736365427fe66f26e6cafdb9cfdea8b6773aea388625530","sha256:d9b5b235de54207e9895a4ac46f263c6be1c8097cfec7ec4e59be2921c8d4a7f"]}],"invalid_events":[],"applied_event_ids":["sha256:5ae8eccf089397c893ae839ebe2126ff7d9125c0b6e67a205621bc63da8dea3e","sha256:22d1d8d9dc59e440c0c9c185c3497ae7a2e216937fb28b46ab7fee3085bb3140"],"state_sha256":"81250693b8f3be0abe7d89c356b18f9a2b083a136d4eca7c0f06e4344f98defc"}