{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:YKJHKZZ2USNKOHJGHQSGOZ6BXS","short_pith_number":"pith:YKJHKZZ2","canonical_record":{"source":{"id":"1903.08792","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-21T01:29:14Z","cross_cats_sorted":["cs.SY","stat.ML"],"title_canon_sha256":"dae408f41e26d906431c31a8de2fa37807b3e07270a034479f7b086d2d5e1c2e","abstract_canon_sha256":"4839c3ca4080c5e4f0d76c1f34a12f7fa56401a9a4c7035da577791beb210fcd"},"schema_version":"1.0"},"canonical_sha256":"c29275673aa49aa71d263c246767c1bcbfa798f889650cc51881a7f43e40a8d9","source":{"kind":"arxiv","id":"1903.08792","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.08792","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"arxiv_version","alias_value":"1903.08792v1","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.08792","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"pith_short_12","alias_value":"YKJHKZZ2USNK","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"YKJHKZZ2USNKOHJG","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"YKJHKZZ2","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:YKJHKZZ2USNKOHJGHQSGOZ6BXS","target":"record","payload":{"canonical_record":{"source":{"id":"1903.08792","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-21T01:29:14Z","cross_cats_sorted":["cs.SY","stat.ML"],"title_canon_sha256":"dae408f41e26d906431c31a8de2fa37807b3e07270a034479f7b086d2d5e1c2e","abstract_canon_sha256":"4839c3ca4080c5e4f0d76c1f34a12f7fa56401a9a4c7035da577791beb210fcd"},"schema_version":"1.0"},"canonical_sha256":"c29275673aa49aa71d263c246767c1bcbfa798f889650cc51881a7f43e40a8d9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:43.915205Z","signature_b64":"1M4oJJXJJnth93on4wunaBdp69OE7nRDuTzX8MkNUN/e9PbbB+UMo7Lwaxmwc9ooVfzu36QPeJ2B8zRZ4t50Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c29275673aa49aa71d263c246767c1bcbfa798f889650cc51881a7f43e40a8d9","last_reissued_at":"2026-05-17T23:50:43.914815Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:43.914815Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.08792","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tDQV+baazUNcaUKQ2cBWdJJ9Rc6vvldyPN9jCKGysAlWV9utvRGMWTT2VF8HTnQAtURG1VZ79H1VwO/yqpM+AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T05:13:33.208096Z"},"content_sha256":"0e5495bf6ce5209f9d7680d5f6b4e81d0dafb0569565a3cf74c2cd1a6bf6131d","schema_version":"1.0","event_id":"sha256:0e5495bf6ce5209f9d7680d5f6b4e81d0dafb0569565a3cf74c2cd1a6bf6131d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:YKJHKZZ2USNKOHJGHQSGOZ6BXS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SY","stat.ML"],"primary_cat":"cs.LG","authors_text":"Gabor Orosz, Joel W. Burdick, Richard Cheng, Richard M. Murray","submitted_at":"2019-03-21T01:29:14Z","abstract_excerpt":"Reinforcement Learning (RL) algorithms have found limited success beyond simulated applications, and one main reason is the absence of safety guarantees during the learning process. Real world systems would realistically fail or break before an optimal controller can be learned. To address this issue, we propose a controller architecture that combines (1) a model-free RL-based controller with (2) model-based controllers utilizing control barrier functions (CBFs) and (3) on-line learning of the unknown system dynamics, in order to ensure safety during learning. Our general framework leverages t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.08792","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"P55HQMIDMYfgJJXF+8IKFUpE/EeShDpa0oOFKHmrhlJH4DGp+BHsKFJFPigkMvgdz+ilXz/4Lc/9urVwGkdCAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T05:13:33.208454Z"},"content_sha256":"cdc624a288abf48e1575d8d52ae2444e0a08aeb43ad8265e44231a9fa4a18acf","schema_version":"1.0","event_id":"sha256:cdc624a288abf48e1575d8d52ae2444e0a08aeb43ad8265e44231a9fa4a18acf"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YKJHKZZ2USNKOHJGHQSGOZ6BXS/bundle.json","state_url":"https://pith.science/pith/YKJHKZZ2USNKOHJGHQSGOZ6BXS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YKJHKZZ2USNKOHJGHQSGOZ6BXS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T05:13:33Z","links":{"resolver":"https://pith.science/pith/YKJHKZZ2USNKOHJGHQSGOZ6BXS","bundle":"https://pith.science/pith/YKJHKZZ2USNKOHJGHQSGOZ6BXS/bundle.json","state":"https://pith.science/pith/YKJHKZZ2USNKOHJGHQSGOZ6BXS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YKJHKZZ2USNKOHJGHQSGOZ6BXS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:YKJHKZZ2USNKOHJGHQSGOZ6BXS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4839c3ca4080c5e4f0d76c1f34a12f7fa56401a9a4c7035da577791beb210fcd","cross_cats_sorted":["cs.SY","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-21T01:29:14Z","title_canon_sha256":"dae408f41e26d906431c31a8de2fa37807b3e07270a034479f7b086d2d5e1c2e"},"schema_version":"1.0","source":{"id":"1903.08792","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.08792","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"arxiv_version","alias_value":"1903.08792v1","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.08792","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"pith_short_12","alias_value":"YKJHKZZ2USNK","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"YKJHKZZ2USNKOHJG","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"YKJHKZZ2","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:cdc624a288abf48e1575d8d52ae2444e0a08aeb43ad8265e44231a9fa4a18acf","target":"graph","created_at":"2026-05-17T23:50:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement Learning (RL) algorithms have found limited success beyond simulated applications, and one main reason is the absence of safety guarantees during the learning process. Real world systems would realistically fail or break before an optimal controller can be learned. To address this issue, we propose a controller architecture that combines (1) a model-free RL-based controller with (2) model-based controllers utilizing control barrier functions (CBFs) and (3) on-line learning of the unknown system dynamics, in order to ensure safety during learning. Our general framework leverages t","authors_text":"Gabor Orosz, Joel W. Burdick, Richard Cheng, Richard M. Murray","cross_cats":["cs.SY","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-21T01:29:14Z","title":"End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.08792","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0e5495bf6ce5209f9d7680d5f6b4e81d0dafb0569565a3cf74c2cd1a6bf6131d","target":"record","created_at":"2026-05-17T23:50:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4839c3ca4080c5e4f0d76c1f34a12f7fa56401a9a4c7035da577791beb210fcd","cross_cats_sorted":["cs.SY","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-03-21T01:29:14Z","title_canon_sha256":"dae408f41e26d906431c31a8de2fa37807b3e07270a034479f7b086d2d5e1c2e"},"schema_version":"1.0","source":{"id":"1903.08792","kind":"arxiv","version":1}},"canonical_sha256":"c29275673aa49aa71d263c246767c1bcbfa798f889650cc51881a7f43e40a8d9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c29275673aa49aa71d263c246767c1bcbfa798f889650cc51881a7f43e40a8d9","first_computed_at":"2026-05-17T23:50:43.914815Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:50:43.914815Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1M4oJJXJJnth93on4wunaBdp69OE7nRDuTzX8MkNUN/e9PbbB+UMo7Lwaxmwc9ooVfzu36QPeJ2B8zRZ4t50Dw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:50:43.915205Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.08792","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0e5495bf6ce5209f9d7680d5f6b4e81d0dafb0569565a3cf74c2cd1a6bf6131d","sha256:cdc624a288abf48e1575d8d52ae2444e0a08aeb43ad8265e44231a9fa4a18acf"],"state_sha256":"bf4c43c6a2a8aa87808f5b1ab9b8be572896c6734b7cf11b1e567c1eef284990"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9zTzydvGaZEMPkMgf1QBX+kUhXm6WvWJar7tAQDGqtLhhCcDJWp5W7Zvx8g8NmvIvMruo+VuP65pbFHxIhGCDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T05:13:33.210290Z","bundle_sha256":"c9ae20e2d336d2a3f7e9812ac8170a870324f159cf4c9c29280e4c52876b2519"}}