{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:JEXJR53KZQYLVIEVFZD23ZZN4I","short_pith_number":"pith:JEXJR53K","schema_version":"1.0","canonical_sha256":"492e98f76acc30baa0952e47ade72de21c81f631f095c724c7d49ebfd851d783","source":{"kind":"arxiv","id":"2601.10710","version":2},"attestation_state":"computed","paper":{"title":"From One-to-One to Many-to-Many: Dynamic Cross-Layer Injection for Deep Vision-Language Fusion","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Cheng Chen, Hang Yu, Jingkuan Song, Lianli Gao, Peng Di, Pengpeng Zeng, Yuyu Guo","submitted_at":"2026-01-15T18:59:10Z","abstract_excerpt":"Vision-Language Models (VLMs) create a severe visual feature bottleneck by using a crude, asymmetric connection that links only the output of the vision encoder to the input of the large language model (LLM). This static architecture fundamentally limits the ability of LLMs to achieve comprehensive alignment with hierarchical visual knowledge, compromising their capacity to accurately integrate local details with global semantics into coherent reasoning. To resolve this, we introduce Cross-Layer Injection (CLI), a novel and lightweight framework that forges a dynamic many-to-many bridge betwee"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2601.10710","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-01-15T18:59:10Z","cross_cats_sorted":[],"title_canon_sha256":"50e03b311544aee545b575280dd2803aaa64d01651c21602fb0669cf88804083","abstract_canon_sha256":"44e56705210841fce21723effefc97f26e5dd5c143e9bb4fd8e863624b15376c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T01:17:16.420941Z","signature_b64":"qVg2q37kTm+QJc5M8AerI7b+eZe4MB8hgV3cKIBXvGqaCpBCtC+aRE4rdV2GZWmMbdpaNdWTeElFV4Nx1oSSDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"492e98f76acc30baa0952e47ade72de21c81f631f095c724c7d49ebfd851d783","last_reissued_at":"2026-07-03T01:17:16.420474Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T01:17:16.420474Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"From One-to-One to Many-to-Many: Dynamic Cross-Layer Injection for Deep Vision-Language Fusion","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Cheng Chen, Hang Yu, Jingkuan Song, Lianli Gao, Peng Di, Pengpeng Zeng, Yuyu Guo","submitted_at":"2026-01-15T18:59:10Z","abstract_excerpt":"Vision-Language Models (VLMs) create a severe visual feature bottleneck by using a crude, asymmetric connection that links only the output of the vision encoder to the input of the large language model (LLM). This static architecture fundamentally limits the ability of LLMs to achieve comprehensive alignment with hierarchical visual knowledge, compromising their capacity to accurately integrate local details with global semantics into coherent reasoning. To resolve this, we introduce Cross-Layer Injection (CLI), a novel and lightweight framework that forges a dynamic many-to-many bridge betwee"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.10710","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.10710/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2601.10710","created_at":"2026-07-03T01:17:16.420532+00:00"},{"alias_kind":"arxiv_version","alias_value":"2601.10710v2","created_at":"2026-07-03T01:17:16.420532+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.10710","created_at":"2026-07-03T01:17:16.420532+00:00"},{"alias_kind":"pith_short_12","alias_value":"JEXJR53KZQYL","created_at":"2026-07-03T01:17:16.420532+00:00"},{"alias_kind":"pith_short_16","alias_value":"JEXJR53KZQYLVIEV","created_at":"2026-07-03T01:17:16.420532+00:00"},{"alias_kind":"pith_short_8","alias_value":"JEXJR53K","created_at":"2026-07-03T01:17:16.420532+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I","json":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I.json","graph_json":"https://pith.science/api/pith-number/JEXJR53KZQYLVIEVFZD23ZZN4I/graph.json","events_json":"https://pith.science/api/pith-number/JEXJR53KZQYLVIEVFZD23ZZN4I/events.json","paper":"https://pith.science/paper/JEXJR53K"},"agent_actions":{"view_html":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I","download_json":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I.json","view_paper":"https://pith.science/paper/JEXJR53K","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2601.10710&json=true","fetch_graph":"https://pith.science/api/pith-number/JEXJR53KZQYLVIEVFZD23ZZN4I/graph.json","fetch_events":"https://pith.science/api/pith-number/JEXJR53KZQYLVIEVFZD23ZZN4I/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I/action/timestamp_anchor","attest_storage":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I/action/storage_attestation","attest_author":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I/action/author_attestation","sign_citation":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I/action/citation_signature","submit_replication":"https://pith.science/pith/JEXJR53KZQYLVIEVFZD23ZZN4I/action/replication_record"}},"created_at":"2026-07-03T01:17:16.420532+00:00","updated_at":"2026-07-03T01:17:16.420532+00:00"}