{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:XLK4HRTGXTL4MFLQHJW6F4SRBT","short_pith_number":"pith:XLK4HRTG","canonical_record":{"source":{"id":"1210.4893","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ffe054c4bd96efe7a0caa091786bf6e6e535410b9f0f7e80deafb932b3d00ff6","abstract_canon_sha256":"94538e1eccf39b8ccc79ef6f8ad8dc5d422e40e771ff0b3bb94aca68f6654a21"},"schema_version":"1.0"},"canonical_sha256":"bad5c3c666bcd7c615703a6de2f2510cd685746ac7bbde2d8a689282c5f4a624","source":{"kind":"arxiv","id":"1210.4893","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1210.4893","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"arxiv_version","alias_value":"1210.4893v1","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1210.4893","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"pith_short_12","alias_value":"XLK4HRTGXTL4","created_at":"2026-05-18T12:27:27Z"},{"alias_kind":"pith_short_16","alias_value":"XLK4HRTGXTL4MFLQ","created_at":"2026-05-18T12:27:27Z"},{"alias_kind":"pith_short_8","alias_value":"XLK4HRTG","created_at":"2026-05-18T12:27:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:XLK4HRTGXTL4MFLQHJW6F4SRBT","target":"record","payload":{"canonical_record":{"source":{"id":"1210.4893","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"ffe054c4bd96efe7a0caa091786bf6e6e535410b9f0f7e80deafb932b3d00ff6","abstract_canon_sha256":"94538e1eccf39b8ccc79ef6f8ad8dc5d422e40e771ff0b3bb94aca68f6654a21"},"schema_version":"1.0"},"canonical_sha256":"bad5c3c666bcd7c615703a6de2f2510cd685746ac7bbde2d8a689282c5f4a624","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:42:55.116981Z","signature_b64":"yey/YX46dqv4C+ygdrI5QXZgxB/sU3KBRv/WJYPOup+b26Aw474FMOpRP0zHRDqPP2Ik4nmHuLEYC19EypmSDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bad5c3c666bcd7c615703a6de2f2510cd685746ac7bbde2d8a689282c5f4a624","last_reissued_at":"2026-05-18T03:42:55.116255Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:42:55.116255Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1210.4893","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:42:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pjn0Y5Vq+hfgVM0IgsxglrTx2MceX/eaF1ZTS4dPxZhlN5GILGM2eXX58H+q0mGdmdKTSOPf6SY8we9jDIVFDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T23:11:44.054516Z"},"content_sha256":"b6270637446f500d76f50405bed8783b53e535bbe94e3b7871a64a27bd9a32f1","schema_version":"1.0","event_id":"sha256:b6270637446f500d76f50405bed8783b53e535bbe94e3b7871a64a27bd9a32f1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:XLK4HRTGXTL4MFLQHJW6F4SRBT","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Sparse Q-learning with Mirror Descent","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Bo Liu, Sridhar Mahadevan","submitted_at":"2012-10-16T17:47:32Z","abstract_excerpt":"This paper explores a new framework for reinforcement learning based on online convex optimization, in particular mirror descent and related algorithms. Mirror descent can be viewed as an enhanced gradient method, particularly suited to minimization of convex functions in highdimensional spaces. Unlike traditional gradient methods, mirror descent undertakes gradient updates of weights in both the dual space and primal space, which are linked together using a Legendre transform. Mirror descent can be viewed as a proximal algorithm where the distance generating function used is a Bregman diverge"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1210.4893","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:42:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"N9zwrc5bTw3WymXneGFC5thnzx+w/VbX3nZIJlRJxjmMV2Yvm5/NwWXVuGIi8zBnWlXSpJbGtVE3Lt4f0JJpAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T23:11:44.055357Z"},"content_sha256":"9c214a20e14f2135d476924bf0963d0d7cb572e8c387006bc870b44ada712fcf","schema_version":"1.0","event_id":"sha256:9c214a20e14f2135d476924bf0963d0d7cb572e8c387006bc870b44ada712fcf"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XLK4HRTGXTL4MFLQHJW6F4SRBT/bundle.json","state_url":"https://pith.science/pith/XLK4HRTGXTL4MFLQHJW6F4SRBT/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XLK4HRTGXTL4MFLQHJW6F4SRBT/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T23:11:44Z","links":{"resolver":"https://pith.science/pith/XLK4HRTGXTL4MFLQHJW6F4SRBT","bundle":"https://pith.science/pith/XLK4HRTGXTL4MFLQHJW6F4SRBT/bundle.json","state":"https://pith.science/pith/XLK4HRTGXTL4MFLQHJW6F4SRBT/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XLK4HRTGXTL4MFLQHJW6F4SRBT/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:XLK4HRTGXTL4MFLQHJW6F4SRBT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"94538e1eccf39b8ccc79ef6f8ad8dc5d422e40e771ff0b3bb94aca68f6654a21","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","title_canon_sha256":"ffe054c4bd96efe7a0caa091786bf6e6e535410b9f0f7e80deafb932b3d00ff6"},"schema_version":"1.0","source":{"id":"1210.4893","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1210.4893","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"arxiv_version","alias_value":"1210.4893v1","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1210.4893","created_at":"2026-05-18T03:42:55Z"},{"alias_kind":"pith_short_12","alias_value":"XLK4HRTGXTL4","created_at":"2026-05-18T12:27:27Z"},{"alias_kind":"pith_short_16","alias_value":"XLK4HRTGXTL4MFLQ","created_at":"2026-05-18T12:27:27Z"},{"alias_kind":"pith_short_8","alias_value":"XLK4HRTG","created_at":"2026-05-18T12:27:27Z"}],"graph_snapshots":[{"event_id":"sha256:9c214a20e14f2135d476924bf0963d0d7cb572e8c387006bc870b44ada712fcf","target":"graph","created_at":"2026-05-18T03:42:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper explores a new framework for reinforcement learning based on online convex optimization, in particular mirror descent and related algorithms. Mirror descent can be viewed as an enhanced gradient method, particularly suited to minimization of convex functions in highdimensional spaces. Unlike traditional gradient methods, mirror descent undertakes gradient updates of weights in both the dual space and primal space, which are linked together using a Legendre transform. Mirror descent can be viewed as a proximal algorithm where the distance generating function used is a Bregman diverge","authors_text":"Bo Liu, Sridhar Mahadevan","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","title":"Sparse Q-learning with Mirror Descent"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1210.4893","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b6270637446f500d76f50405bed8783b53e535bbe94e3b7871a64a27bd9a32f1","target":"record","created_at":"2026-05-18T03:42:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"94538e1eccf39b8ccc79ef6f8ad8dc5d422e40e771ff0b3bb94aca68f6654a21","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-10-16T17:47:32Z","title_canon_sha256":"ffe054c4bd96efe7a0caa091786bf6e6e535410b9f0f7e80deafb932b3d00ff6"},"schema_version":"1.0","source":{"id":"1210.4893","kind":"arxiv","version":1}},"canonical_sha256":"bad5c3c666bcd7c615703a6de2f2510cd685746ac7bbde2d8a689282c5f4a624","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bad5c3c666bcd7c615703a6de2f2510cd685746ac7bbde2d8a689282c5f4a624","first_computed_at":"2026-05-18T03:42:55.116255Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:42:55.116255Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yey/YX46dqv4C+ygdrI5QXZgxB/sU3KBRv/WJYPOup+b26Aw474FMOpRP0zHRDqPP2Ik4nmHuLEYC19EypmSDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:42:55.116981Z","signed_message":"canonical_sha256_bytes"},"source_id":"1210.4893","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b6270637446f500d76f50405bed8783b53e535bbe94e3b7871a64a27bd9a32f1","sha256:9c214a20e14f2135d476924bf0963d0d7cb572e8c387006bc870b44ada712fcf"],"state_sha256":"0e32d8f0b38cd83d319f2a5be4df8d5f82248a62e8d46b19ef48f2cb80c38769"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"COrEDEn2J+xZRf9PbI+V65nBbz6lxejulXDDekquCXgPDaSZSND5wuHrJQLlUuFwhgGUXRtB5JhDzDNxUBNqDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T23:11:44.060633Z","bundle_sha256":"0fc8e02e2918a26b593bf974621b2682c044058f865e4a903309dd6fd3e76f21"}}