{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:3CASNF7HBXQPV7LIWGYERCDGUJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e794d9b6eede041f0aede2b0241765e99af14b3d76bd7a507d3d1ace5aab400e","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-12-02T06:30:10Z","title_canon_sha256":"3cab414ddfd8e2fcf302420c005102fd97e23d88b1eb1da8b9368c8b74c997fb"},"schema_version":"1.0","source":{"id":"2512.02456","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.02456","created_at":"2026-06-30T02:17:12Z"},{"alias_kind":"arxiv_version","alias_value":"2512.02456v2","created_at":"2026-06-30T02:17:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.02456","created_at":"2026-06-30T02:17:12Z"},{"alias_kind":"pith_short_12","alias_value":"3CASNF7HBXQP","created_at":"2026-06-30T02:17:12Z"},{"alias_kind":"pith_short_16","alias_value":"3CASNF7HBXQPV7LI","created_at":"2026-06-30T02:17:12Z"},{"alias_kind":"pith_short_8","alias_value":"3CASNF7H","created_at":"2026-06-30T02:17:12Z"}],"graph_snapshots":[{"event_id":"sha256:a825d9eadc88e1a8bcfa395e9e4abea37f2aea3684da6043106b4f72c5786e64","target":"graph","created_at":"2026-06-30T02:17:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.02456/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Vision-Language Models (VLMs) have achieved remarkable progress in integrating visual perception with language understanding. However, effective multimodal reasoning requires both accurate perception and robust reasoning, and weakness in either limits the performance of VLMs. Prior efforts to enhance reasoning often depend on high-quality chain-of-thought (CoT) data, obtained via labor-intensive human annotations, costly proprietary models, or self-training methods that overlook perception. To address these limitations, we propose a simple yet effective self-training framework called See-Think","authors_text":"Sadbhawna, Sonam Gupta, Sourabh Sharma","cross_cats":["cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-12-02T06:30:10Z","title":"See, Think, Learn: A Self-Taught Multimodal Reasoner"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.02456","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6404bc963557b0c7bd145dbaa2b34a92687f2a420f591fbfa18e8ed9d37826b1","target":"record","created_at":"2026-06-30T02:17:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e794d9b6eede041f0aede2b0241765e99af14b3d76bd7a507d3d1ace5aab400e","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-12-02T06:30:10Z","title_canon_sha256":"3cab414ddfd8e2fcf302420c005102fd97e23d88b1eb1da8b9368c8b74c997fb"},"schema_version":"1.0","source":{"id":"2512.02456","kind":"arxiv","version":2}},"canonical_sha256":"d8812697e70de0fafd68b1b0488866a24405705b4a4e58f8e6305b39006ab089","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d8812697e70de0fafd68b1b0488866a24405705b4a4e58f8e6305b39006ab089","first_computed_at":"2026-06-30T02:17:12.371162Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-30T02:17:12.371162Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yTtS1Gy/tnhkWdWFLa0/WTgP7oOxJvwEnoV3RDWYoW3yL3Hqwr8YH/LqRAu42oobOSRUOM9CSNK+Pjn54egaDA==","signature_status":"signed_v1","signed_at":"2026-06-30T02:17:12.372043Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.02456","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6404bc963557b0c7bd145dbaa2b34a92687f2a420f591fbfa18e8ed9d37826b1","sha256:a825d9eadc88e1a8bcfa395e9e4abea37f2aea3684da6043106b4f72c5786e64"],"state_sha256":"0aa02bf8ed68e5aee6db13fe099ef847d847dd727cc66ba69fdf976cb5f7503e"}