{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:6YLHX4CKHOG7OJ3AH7X6QVSMKM","short_pith_number":"pith:6YLHX4CK","schema_version":"1.0","canonical_sha256":"f6167bf04a3b8df727603fefe8564c5321b9e7860ed4d5c88543f196f3dee44f","source":{"kind":"arxiv","id":"2606.04300","version":1},"attestation_state":"computed","paper":{"title":"Argus-Retriever: Vision-LLM Late-Interaction Retrieval with Region-Aware Query-Conditioned MoE for Visual Document Retrieval","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Abdelrahman Abdallah, Adam Jatowt, Mahmoud Abdalla, Mohammed Ali","submitted_at":"2026-06-03T00:08:44Z","abstract_excerpt":"Late-interaction vision-language retrievers represent each document page as many visual token embeddings and score queries with MaxSim. In systems such as ColPali, ColQwen, ColNomic, and Nemotron ColEmbed, the document embeddings are produced without seeing the query, so the same page is represented identically for a table lookup, a chart question, and a layout-sensitive evidence request. We introduce \\textbf{Argus}, a family of query-conditioned late-interaction retrievers built on Qwen3.5-VL. Argus adds a region-aware Mixture-of-Experts module: the query encoder produces both retrieval embed"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.04300","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-06-03T00:08:44Z","cross_cats_sorted":[],"title_canon_sha256":"02619456bbbd8616cd2607326598fd9107a06b3885889a10e2b6191ad2fe4c34","abstract_canon_sha256":"5166a45a2828564a0a3955123fc8153baaa3317f2b57055a78c17e06094a0f31"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-04T01:09:02.519822Z","signature_b64":"zrTDBP0UR1e4HIs9p88D+yp5o8FSxwRy4c0RtnuYYN26ycyL/KYhY7jcneYxLGbUc+27dbU93RQ7odP84+V2Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f6167bf04a3b8df727603fefe8564c5321b9e7860ed4d5c88543f196f3dee44f","last_reissued_at":"2026-06-04T01:09:02.519142Z","signature_status":"signed_v1","first_computed_at":"2026-06-04T01:09:02.519142Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Argus-Retriever: Vision-LLM Late-Interaction Retrieval with Region-Aware Query-Conditioned MoE for Visual Document Retrieval","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Abdelrahman Abdallah, Adam Jatowt, Mahmoud Abdalla, Mohammed Ali","submitted_at":"2026-06-03T00:08:44Z","abstract_excerpt":"Late-interaction vision-language retrievers represent each document page as many visual token embeddings and score queries with MaxSim. In systems such as ColPali, ColQwen, ColNomic, and Nemotron ColEmbed, the document embeddings are produced without seeing the query, so the same page is represented identically for a table lookup, a chart question, and a layout-sensitive evidence request. We introduce \\textbf{Argus}, a family of query-conditioned late-interaction retrievers built on Qwen3.5-VL. Argus adds a region-aware Mixture-of-Experts module: the query encoder produces both retrieval embed"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.04300","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.04300/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.04300","created_at":"2026-06-04T01:09:02.519252+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.04300v1","created_at":"2026-06-04T01:09:02.519252+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.04300","created_at":"2026-06-04T01:09:02.519252+00:00"},{"alias_kind":"pith_short_12","alias_value":"6YLHX4CKHOG7","created_at":"2026-06-04T01:09:02.519252+00:00"},{"alias_kind":"pith_short_16","alias_value":"6YLHX4CKHOG7OJ3A","created_at":"2026-06-04T01:09:02.519252+00:00"},{"alias_kind":"pith_short_8","alias_value":"6YLHX4CK","created_at":"2026-06-04T01:09:02.519252+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM","json":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM.json","graph_json":"https://pith.science/api/pith-number/6YLHX4CKHOG7OJ3AH7X6QVSMKM/graph.json","events_json":"https://pith.science/api/pith-number/6YLHX4CKHOG7OJ3AH7X6QVSMKM/events.json","paper":"https://pith.science/paper/6YLHX4CK"},"agent_actions":{"view_html":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM","download_json":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM.json","view_paper":"https://pith.science/paper/6YLHX4CK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.04300&json=true","fetch_graph":"https://pith.science/api/pith-number/6YLHX4CKHOG7OJ3AH7X6QVSMKM/graph.json","fetch_events":"https://pith.science/api/pith-number/6YLHX4CKHOG7OJ3AH7X6QVSMKM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM/action/storage_attestation","attest_author":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM/action/author_attestation","sign_citation":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM/action/citation_signature","submit_replication":"https://pith.science/pith/6YLHX4CKHOG7OJ3AH7X6QVSMKM/action/replication_record"}},"created_at":"2026-06-04T01:09:02.519252+00:00","updated_at":"2026-06-04T01:09:02.519252+00:00"}