{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:HNLSQK6USZJ6UEEC4AGSZ6DNKK","short_pith_number":"pith:HNLSQK6U","schema_version":"1.0","canonical_sha256":"3b57282bd49653ea1082e00d2cf86d52849a0203703d299df8193631f3287e93","source":{"kind":"arxiv","id":"1811.04350","version":1},"attestation_state":"computed","paper":{"title":"Towards Governing Agent's Efficacy: Action-Conditional $\\beta$-VAE for Deep Transparent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Gyujeong Lee, John Yang, Minsung Hyun, Nojun Kwak, Simyung Chang","submitted_at":"2018-11-11T04:48:15Z","abstract_excerpt":"We tackle the blackbox issue of deep neural networks in the settings of reinforcement learning (RL) where neural agents learn towards maximizing reward gains in an uncontrollable way. Such learning approach is risky when the interacting environment includes an expanse of state space because it is then almost impossible to foresee all unwanted outcomes and penalize them with negative rewards beforehand. Unlike reverse analysis of learned neural features from previous works, our proposed method \\nj{tackles the blackbox issue by encouraging} an RL policy network to learn interpretable latent feat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.04350","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-11T04:48:15Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"c8eec0c113635fdd5d93a4f1410b78cb9528582f8438d0c3608da8d92db5ce26","abstract_canon_sha256":"65a09f0117fad392e2bc6891f075d3e46f86949698a4ab5796904b04dde26694"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:05.473248Z","signature_b64":"8GKIhN2ifSzPNPLT1Vy+UUFLwy72JVzJ/1z/SPwqtWGL8N+oVLCkqf1rLUKcf3DDoc5wgu+RLRLC1rteLziTAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3b57282bd49653ea1082e00d2cf86d52849a0203703d299df8193631f3287e93","last_reissued_at":"2026-05-18T00:01:05.472656Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:05.472656Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Governing Agent's Efficacy: Action-Conditional $\\beta$-VAE for Deep Transparent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Gyujeong Lee, John Yang, Minsung Hyun, Nojun Kwak, Simyung Chang","submitted_at":"2018-11-11T04:48:15Z","abstract_excerpt":"We tackle the blackbox issue of deep neural networks in the settings of reinforcement learning (RL) where neural agents learn towards maximizing reward gains in an uncontrollable way. Such learning approach is risky when the interacting environment includes an expanse of state space because it is then almost impossible to foresee all unwanted outcomes and penalize them with negative rewards beforehand. Unlike reverse analysis of learned neural features from previous works, our proposed method \\nj{tackles the blackbox issue by encouraging} an RL policy network to learn interpretable latent feat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.04350","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.04350","created_at":"2026-05-18T00:01:05.472745+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.04350v1","created_at":"2026-05-18T00:01:05.472745+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.04350","created_at":"2026-05-18T00:01:05.472745+00:00"},{"alias_kind":"pith_short_12","alias_value":"HNLSQK6USZJ6","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_16","alias_value":"HNLSQK6USZJ6UEEC","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_8","alias_value":"HNLSQK6U","created_at":"2026-05-18T12:32:28.185984+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK","json":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK.json","graph_json":"https://pith.science/api/pith-number/HNLSQK6USZJ6UEEC4AGSZ6DNKK/graph.json","events_json":"https://pith.science/api/pith-number/HNLSQK6USZJ6UEEC4AGSZ6DNKK/events.json","paper":"https://pith.science/paper/HNLSQK6U"},"agent_actions":{"view_html":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK","download_json":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK.json","view_paper":"https://pith.science/paper/HNLSQK6U","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.04350&json=true","fetch_graph":"https://pith.science/api/pith-number/HNLSQK6USZJ6UEEC4AGSZ6DNKK/graph.json","fetch_events":"https://pith.science/api/pith-number/HNLSQK6USZJ6UEEC4AGSZ6DNKK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK/action/storage_attestation","attest_author":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK/action/author_attestation","sign_citation":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK/action/citation_signature","submit_replication":"https://pith.science/pith/HNLSQK6USZJ6UEEC4AGSZ6DNKK/action/replication_record"}},"created_at":"2026-05-18T00:01:05.472745+00:00","updated_at":"2026-05-18T00:01:05.472745+00:00"}