{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:UADTXLBBSXLR4IWONT5IMJPLT5","short_pith_number":"pith:UADTXLBB","schema_version":"1.0","canonical_sha256":"a0073bac2195d71e22ce6cfa8625eb9f44d38cd0a902a19698a1bc2fba4f7029","source":{"kind":"arxiv","id":"1809.08925","version":1},"attestation_state":"computed","paper":{"title":"Constrained Exploration and Recovery from Experience Shaping","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Asim Munawar, Don Joven Agravante, Giovanni De Magistris, Ryuki Tachibana, Subhajit Chaudhury, Tu-Hoa Pham","submitted_at":"2018-09-21T06:11:11Z","abstract_excerpt":"We consider the problem of reinforcement learning under safety requirements, in which an agent is trained to complete a given task, typically formalized as the maximization of a reward signal over time, while concurrently avoiding undesirable actions or states, associated to lower rewards, or penalties. The construction and balancing of different reward components can be difficult in the presence of multiple objectives, yet is crucial for producing a satisfying policy. For example, in reaching a target while avoiding obstacles, low collision penalties can lead to reckless movements while high "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1809.08925","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-21T06:11:11Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"db1063ee1b37763200a7041f18e596d12219b1a6e560e741c544eaf3c24bc41c","abstract_canon_sha256":"0cffacd7f8de9ee8486cfe59d41d437833ef2e727cd7d17a734afe5e6ea74739"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:05:03.153965Z","signature_b64":"31XZuco7bnrfWvTCwKJZVM/2vm2YGHgOkhcOVWvOSPSYI0kneFVltzjTlopMib7O1xvIbTbOMSeGKOwQMSgZCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a0073bac2195d71e22ce6cfa8625eb9f44d38cd0a902a19698a1bc2fba4f7029","last_reissued_at":"2026-05-18T00:05:03.153518Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:05:03.153518Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Constrained Exploration and Recovery from Experience Shaping","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Asim Munawar, Don Joven Agravante, Giovanni De Magistris, Ryuki Tachibana, Subhajit Chaudhury, Tu-Hoa Pham","submitted_at":"2018-09-21T06:11:11Z","abstract_excerpt":"We consider the problem of reinforcement learning under safety requirements, in which an agent is trained to complete a given task, typically formalized as the maximization of a reward signal over time, while concurrently avoiding undesirable actions or states, associated to lower rewards, or penalties. The construction and balancing of different reward components can be difficult in the presence of multiple objectives, yet is crucial for producing a satisfying policy. For example, in reaching a target while avoiding obstacles, low collision penalties can lead to reckless movements while high "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.08925","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1809.08925","created_at":"2026-05-18T00:05:03.153594+00:00"},{"alias_kind":"arxiv_version","alias_value":"1809.08925v1","created_at":"2026-05-18T00:05:03.153594+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.08925","created_at":"2026-05-18T00:05:03.153594+00:00"},{"alias_kind":"pith_short_12","alias_value":"UADTXLBBSXLR","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_16","alias_value":"UADTXLBBSXLR4IWO","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_8","alias_value":"UADTXLBB","created_at":"2026-05-18T12:32:56.356000+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5","json":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5.json","graph_json":"https://pith.science/api/pith-number/UADTXLBBSXLR4IWONT5IMJPLT5/graph.json","events_json":"https://pith.science/api/pith-number/UADTXLBBSXLR4IWONT5IMJPLT5/events.json","paper":"https://pith.science/paper/UADTXLBB"},"agent_actions":{"view_html":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5","download_json":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5.json","view_paper":"https://pith.science/paper/UADTXLBB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1809.08925&json=true","fetch_graph":"https://pith.science/api/pith-number/UADTXLBBSXLR4IWONT5IMJPLT5/graph.json","fetch_events":"https://pith.science/api/pith-number/UADTXLBBSXLR4IWONT5IMJPLT5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5/action/storage_attestation","attest_author":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5/action/author_attestation","sign_citation":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5/action/citation_signature","submit_replication":"https://pith.science/pith/UADTXLBBSXLR4IWONT5IMJPLT5/action/replication_record"}},"created_at":"2026-05-18T00:05:03.153594+00:00","updated_at":"2026-05-18T00:05:03.153594+00:00"}