{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:6UGOGP3AXIFNY35KOVFRLJJRDT","short_pith_number":"pith:6UGOGP3A","canonical_record":{"source":{"id":"1801.08757","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-26T11:11:18Z","cross_cats_sorted":[],"title_canon_sha256":"5c43a7166bdf4497f8d819d70e1d81ea05528b300c96fc98859b046385ee3ecf","abstract_canon_sha256":"5140949796d4acf72f21c94cf17ab59fbda0ffe045a81b530ffe8ec8d57096ec"},"schema_version":"1.0"},"canonical_sha256":"f50ce33f60ba0adc6faa754b15a5311cf1579b1fb7edc64f2d27472bd4f7c28f","source":{"kind":"arxiv","id":"1801.08757","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1801.08757","created_at":"2026-05-18T00:25:03Z"},{"alias_kind":"arxiv_version","alias_value":"1801.08757v1","created_at":"2026-05-18T00:25:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1801.08757","created_at":"2026-05-18T00:25:03Z"},{"alias_kind":"pith_short_12","alias_value":"6UGOGP3AXIFN","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"6UGOGP3AXIFNY35K","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"6UGOGP3A","created_at":"2026-05-18T12:32:11Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:6UGOGP3AXIFNY35KOVFRLJJRDT","target":"record","payload":{"canonical_record":{"source":{"id":"1801.08757","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-26T11:11:18Z","cross_cats_sorted":[],"title_canon_sha256":"5c43a7166bdf4497f8d819d70e1d81ea05528b300c96fc98859b046385ee3ecf","abstract_canon_sha256":"5140949796d4acf72f21c94cf17ab59fbda0ffe045a81b530ffe8ec8d57096ec"},"schema_version":"1.0"},"canonical_sha256":"f50ce33f60ba0adc6faa754b15a5311cf1579b1fb7edc64f2d27472bd4f7c28f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:25:03.096576Z","signature_b64":"wIgDkPOAKbM5gE3FHmM4eFaeNj3AYb0RWUhQbdca7bY45RgYMPKv596Zq8tzyPdHmcazoGe2QlbW+sp3z0hyCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f50ce33f60ba0adc6faa754b15a5311cf1579b1fb7edc64f2d27472bd4f7c28f","last_reissued_at":"2026-05-18T00:25:03.096154Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:25:03.096154Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1801.08757","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:25:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XBHB/qlHFI6nQW3jYYWOyunmHCcKhtJ+LE6f98hhZBYIPAvqb6wWdH152tdUEoR10XCXffqRQCGDr+2uyGiqCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T08:20:25.647066Z"},"content_sha256":"b08bd242c7021d60515ddc11b2073eadecad9978d163548704832b10d24bbbcb","schema_version":"1.0","event_id":"sha256:b08bd242c7021d60515ddc11b2073eadecad9978d163548704832b10d24bbbcb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:6UGOGP3AXIFNY35KOVFRLJJRDT","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safe Exploration in Continuous Action Spaces","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Cosmin Paduraru, Gal Dalal, Krishnamurthy Dvijotham, Matej Vecerik, Todd Hester, Yuval Tassa","submitted_at":"2018-01-26T11:11:18Z","abstract_excerpt":"We address the problem of deploying a reinforcement learning (RL) agent on a physical system such as a datacenter cooling unit or robot, where critical constraints must never be violated. We show how to exploit the typically smooth dynamics of these systems and enable RL algorithms to never violate constraints during learning. Our technique is to directly add to the policy a safety layer that analytically solves an action correction formulation per each state. The novelty of obtaining an elegant closed-form solution is attained due to a linearized model, learned on past trajectories consisting"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1801.08757","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:25:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"k1UlBYqDuWXgR314qNz7lQTIe9faMJX4T0OxaXShhhFQJ81yEjtAY6hAddbHNwA1zvgcT8TMKrrb+h6Evr/rCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T08:20:25.647831Z"},"content_sha256":"cca5fd76db33ebefb218b710cf184737d5c92c2483fb4d7687b7aa615a6aa0ad","schema_version":"1.0","event_id":"sha256:cca5fd76db33ebefb218b710cf184737d5c92c2483fb4d7687b7aa615a6aa0ad"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6UGOGP3AXIFNY35KOVFRLJJRDT/bundle.json","state_url":"https://pith.science/pith/6UGOGP3AXIFNY35KOVFRLJJRDT/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6UGOGP3AXIFNY35KOVFRLJJRDT/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T08:20:25Z","links":{"resolver":"https://pith.science/pith/6UGOGP3AXIFNY35KOVFRLJJRDT","bundle":"https://pith.science/pith/6UGOGP3AXIFNY35KOVFRLJJRDT/bundle.json","state":"https://pith.science/pith/6UGOGP3AXIFNY35KOVFRLJJRDT/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6UGOGP3AXIFNY35KOVFRLJJRDT/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:6UGOGP3AXIFNY35KOVFRLJJRDT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5140949796d4acf72f21c94cf17ab59fbda0ffe045a81b530ffe8ec8d57096ec","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-26T11:11:18Z","title_canon_sha256":"5c43a7166bdf4497f8d819d70e1d81ea05528b300c96fc98859b046385ee3ecf"},"schema_version":"1.0","source":{"id":"1801.08757","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1801.08757","created_at":"2026-05-18T00:25:03Z"},{"alias_kind":"arxiv_version","alias_value":"1801.08757v1","created_at":"2026-05-18T00:25:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1801.08757","created_at":"2026-05-18T00:25:03Z"},{"alias_kind":"pith_short_12","alias_value":"6UGOGP3AXIFN","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"6UGOGP3AXIFNY35K","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"6UGOGP3A","created_at":"2026-05-18T12:32:11Z"}],"graph_snapshots":[{"event_id":"sha256:cca5fd76db33ebefb218b710cf184737d5c92c2483fb4d7687b7aa615a6aa0ad","target":"graph","created_at":"2026-05-18T00:25:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We address the problem of deploying a reinforcement learning (RL) agent on a physical system such as a datacenter cooling unit or robot, where critical constraints must never be violated. We show how to exploit the typically smooth dynamics of these systems and enable RL algorithms to never violate constraints during learning. Our technique is to directly add to the policy a safety layer that analytically solves an action correction formulation per each state. The novelty of obtaining an elegant closed-form solution is attained due to a linearized model, learned on past trajectories consisting","authors_text":"Cosmin Paduraru, Gal Dalal, Krishnamurthy Dvijotham, Matej Vecerik, Todd Hester, Yuval Tassa","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-26T11:11:18Z","title":"Safe Exploration in Continuous Action Spaces"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1801.08757","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b08bd242c7021d60515ddc11b2073eadecad9978d163548704832b10d24bbbcb","target":"record","created_at":"2026-05-18T00:25:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5140949796d4acf72f21c94cf17ab59fbda0ffe045a81b530ffe8ec8d57096ec","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-26T11:11:18Z","title_canon_sha256":"5c43a7166bdf4497f8d819d70e1d81ea05528b300c96fc98859b046385ee3ecf"},"schema_version":"1.0","source":{"id":"1801.08757","kind":"arxiv","version":1}},"canonical_sha256":"f50ce33f60ba0adc6faa754b15a5311cf1579b1fb7edc64f2d27472bd4f7c28f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f50ce33f60ba0adc6faa754b15a5311cf1579b1fb7edc64f2d27472bd4f7c28f","first_computed_at":"2026-05-18T00:25:03.096154Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:25:03.096154Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wIgDkPOAKbM5gE3FHmM4eFaeNj3AYb0RWUhQbdca7bY45RgYMPKv596Zq8tzyPdHmcazoGe2QlbW+sp3z0hyCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:25:03.096576Z","signed_message":"canonical_sha256_bytes"},"source_id":"1801.08757","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b08bd242c7021d60515ddc11b2073eadecad9978d163548704832b10d24bbbcb","sha256:cca5fd76db33ebefb218b710cf184737d5c92c2483fb4d7687b7aa615a6aa0ad"],"state_sha256":"62265be714fa169d6a17be5b64eacb9d126c250c9790ddf7b19c2619f9358a59"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Uwa4GUB5za8/m+Xs+7aH9uXkPSfxhSKglUI0h9RHYY1M+L8SM6cwl+Via48122Y6IKVl0Mdz01JtMLBakZStDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T08:20:25.651654Z","bundle_sha256":"70b26e121bd3d6abdaa41a1aeaa90b0f53ead3b69da0fdc03cb278d5ab2c2a89"}}