{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:Z43LXKWPAMDLOHNY4PQIQY6JLV","short_pith_number":"pith:Z43LXKWP","schema_version":"1.0","canonical_sha256":"cf36bbaacf0306b71db8e3e08863c95d6d665df4fce51a86f60fe5cac9432b80","source":{"kind":"arxiv","id":"2605.11021","version":2},"attestation_state":"computed","paper":{"title":"A Switching System Theory of Q-Learning with Linear Function Approximation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"The mean dynamics of Q-learning with linear function approximation are exactly equivalent to a linear switched system whose stability determines convergence.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Donghwan Lee, Han-Dong Lim","submitted_at":"2026-05-10T16:21:31Z","abstract_excerpt":"This paper develops a switching-system interpretation of Q-learning with linear function approximation (LFA) based on the joint spectral radius (JSR). We derive an exact linear switched model for the mean dynamics and relate convergence to stability of the corresponding switched system. The same construction is then used for stochastic linear Q-learning with independent and identically distributed (i.i.d.) observations and with Markovian observations. Although exact JSR computation is difficult in general, the certificate captures products of switching modes and can be less conservative than o"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.11021","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-10T16:21:31Z","cross_cats_sorted":[],"title_canon_sha256":"1367a8f9dfe7deeb7b6cda9fc40c07ae986d30178b072d47d5f2fd1ecdc94c61","abstract_canon_sha256":"50e911934cfb9d76dc0b2679b68867559464df43c129533a94dbdd78c07fee9c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:16.523167Z","signature_b64":"e+6q1gk2IbGp7bW84tBBG6n0UsRbAng4Z8QGQwSFubzROvbN66+RDeI749PNkbBQHW2r05tdmZSmKhyzHXElBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cf36bbaacf0306b71db8e3e08863c95d6d665df4fce51a86f60fe5cac9432b80","last_reissued_at":"2026-05-20T01:05:16.522625Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:16.522625Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Switching System Theory of Q-Learning with Linear Function Approximation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"The mean dynamics of Q-learning with linear function approximation are exactly equivalent to a linear switched system whose stability determines convergence.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Donghwan Lee, Han-Dong Lim","submitted_at":"2026-05-10T16:21:31Z","abstract_excerpt":"This paper develops a switching-system interpretation of Q-learning with linear function approximation (LFA) based on the joint spectral radius (JSR). We derive an exact linear switched model for the mean dynamics and relate convergence to stability of the corresponding switched system. The same construction is then used for stochastic linear Q-learning with independent and identically distributed (i.i.d.) observations and with Markovian observations. Although exact JSR computation is difficult in general, the certificate captures products of switching modes and can be less conservative than o"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We derive an exact linear switched model for the mean dynamics and relate convergence to stability of the corresponding switched system.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the mean dynamics of Q-learning with linear function approximation admit an exact representation as a finite set of linear switching modes whose joint spectral radius governs convergence.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Q-learning with linear function approximation is recast as a switched linear system whose mean dynamics converge precisely when the joint spectral radius of the switching matrices is less than one.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"The mean dynamics of Q-learning with linear function approximation are exactly equivalent to a linear switched system whose stability determines convergence.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a247039adf7c2c82490ccff4e02adeb57ca275eaec16da47124a3be6fb0711bd"},"source":{"id":"2605.11021","kind":"arxiv","version":2},"verdict":{"id":"4ea7ee26-af9d-4ea6-a95b-6ba0ed2d5db3","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-13T06:10:24.513147Z","strongest_claim":"We derive an exact linear switched model for the mean dynamics and relate convergence to stability of the corresponding switched system.","one_line_summary":"Q-learning with linear function approximation is recast as a switched linear system whose mean dynamics converge precisely when the joint spectral radius of the switching matrices is less than one.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the mean dynamics of Q-learning with linear function approximation admit an exact representation as a finite set of linear switching modes whose joint spectral radius governs convergence.","pith_extraction_headline":"The mean dynamics of Q-learning with linear function approximation are exactly equivalent to a linear switched system whose stability determines convergence."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.11021/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T16:38:37.979452Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T12:31:18.615231Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T10:01:55.864039Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"d945f3abaec74bf6fcc6b26cb0613bdcf3d7b3efd19ea009b0fc4674ea76b303"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"fa7fc3100520dbfff4d4799d0c1459456edb08313e192ec1c9de9bff4712aadc"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.11021","created_at":"2026-05-20T01:05:16.522705+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.11021v2","created_at":"2026-05-20T01:05:16.522705+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.11021","created_at":"2026-05-20T01:05:16.522705+00:00"},{"alias_kind":"pith_short_12","alias_value":"Z43LXKWPAMDL","created_at":"2026-05-20T01:05:16.522705+00:00"},{"alias_kind":"pith_short_16","alias_value":"Z43LXKWPAMDLOHNY","created_at":"2026-05-20T01:05:16.522705+00:00"},{"alias_kind":"pith_short_8","alias_value":"Z43LXKWP","created_at":"2026-05-20T01:05:16.522705+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2606.10835","citing_title":"Geometrically Averaged Hard Target Updates for Linear Q-Learning","ref_index":16,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV","json":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV.json","graph_json":"https://pith.science/api/pith-number/Z43LXKWPAMDLOHNY4PQIQY6JLV/graph.json","events_json":"https://pith.science/api/pith-number/Z43LXKWPAMDLOHNY4PQIQY6JLV/events.json","paper":"https://pith.science/paper/Z43LXKWP"},"agent_actions":{"view_html":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV","download_json":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV.json","view_paper":"https://pith.science/paper/Z43LXKWP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.11021&json=true","fetch_graph":"https://pith.science/api/pith-number/Z43LXKWPAMDLOHNY4PQIQY6JLV/graph.json","fetch_events":"https://pith.science/api/pith-number/Z43LXKWPAMDLOHNY4PQIQY6JLV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV/action/storage_attestation","attest_author":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV/action/author_attestation","sign_citation":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV/action/citation_signature","submit_replication":"https://pith.science/pith/Z43LXKWPAMDLOHNY4PQIQY6JLV/action/replication_record"}},"created_at":"2026-05-20T01:05:16.522705+00:00","updated_at":"2026-05-20T01:05:16.522705+00:00"}