{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:GRANV7HWYFVIQT53MFZ5HMNIIU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e8af9f08c98f036e912b083a8c87615676be3f8302944b1f25ecc00f3737d5fa","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-07-01T08:57:25Z","title_canon_sha256":"62f62f77e0e01419c181e94ba871140f46c79ea431829ef3f426bb00302a031b"},"schema_version":"1.0","source":{"id":"2607.00642","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2607.00642","created_at":"2026-07-02T01:17:50Z"},{"alias_kind":"arxiv_version","alias_value":"2607.00642v1","created_at":"2026-07-02T01:17:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.00642","created_at":"2026-07-02T01:17:50Z"},{"alias_kind":"pith_short_12","alias_value":"GRANV7HWYFVI","created_at":"2026-07-02T01:17:50Z"},{"alias_kind":"pith_short_16","alias_value":"GRANV7HWYFVIQT53","created_at":"2026-07-02T01:17:50Z"},{"alias_kind":"pith_short_8","alias_value":"GRANV7HW","created_at":"2026-07-02T01:17:50Z"}],"graph_snapshots":[{"event_id":"sha256:9285d76d0d838c965cd37715aeff00b5cc4ebea9d0c068becb3dbf1d152732ee","target":"graph","created_at":"2026-07-02T01:17:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2607.00642/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning has proven to be a valuable tool in the creation of advanced AI and robotic systems, contributing to everything from game playing to robotics to foundation models. Through trial-and-error, these AI systems typically learn one, near-optimal behavior to solve their tasks. However, there are many use cases in which one would like to assert some level of control, preferably in real time, over how the task is solved. We refer to these modifications of a core task as styles. We combine universal value function approximators (UVFAs) with carefully selected training scenarios, l","authors_text":"(2) Sony AI, (3) Sony AI, Akanksha Saran (2), Alisa Devlic (1), Andreanne Lemay (2), Craig Sherstan (3), Daniel Hernandez (2), Declan Oller (2), Dustin R. Morrill (2), Elahe Aghapour (2), Fatima Davelouis (2), Florian Fuchs (1), Francesco Riccio (1), G. Zacharias Holland (2), Harm van Seijen (2), Ishan Durugkar (2), Jaden B. Travnik (2), James A. MacGlashan (2), Japan), Johannes G\\\"unther (2), Josh Davidson (2), Kaushik Subramanian (1), Kenta Kawamoto (3), Kevin Waugh (2), Kizza N. Frisbee (2), Mady Govil (2), Maxwell Svetlik (2), Michael D. Thomure (2), Michael Spranger (3), Neil Burch (2), Nolan D. Bard (2), North America, Patrick MacAlpine (2), Peter R. Wurman (2) ((1) Sony AI, Peter Stone (2), Raksha Kumaraswamy (2), Roberto Capobianco (1), Sahil Jain (2), Samuel Barrett (2), Shruti Mishra (1), Siddhant Gangapurwala (2), Switzerland, Takuma Seno (3), Thomas J. Walsh (2), Tokyo, various locations, Varun R. Kompella (2), Yunshu Du (2), Zurich","cross_cats":["cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-07-01T08:57:25Z","title":"Coachable agents for interactive gameplay"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.00642","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:346d658428e7b1953b7f18c36ec110c639fc6e0e4fdd5ab6fe91ab741b040e48","target":"record","created_at":"2026-07-02T01:17:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e8af9f08c98f036e912b083a8c87615676be3f8302944b1f25ecc00f3737d5fa","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-07-01T08:57:25Z","title_canon_sha256":"62f62f77e0e01419c181e94ba871140f46c79ea431829ef3f426bb00302a031b"},"schema_version":"1.0","source":{"id":"2607.00642","kind":"arxiv","version":1}},"canonical_sha256":"3440dafcf6c16a884fbb6173d3b1a845329dd76905c91665eb2bc4b8ac066119","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3440dafcf6c16a884fbb6173d3b1a845329dd76905c91665eb2bc4b8ac066119","first_computed_at":"2026-07-02T01:17:50.206689Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-02T01:17:50.206689Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4jPikraZ2iskgXJPGkBV3o91U9U0Q3UAVAN05gIAT9NLhuxwOXUBn06SgPv5jjZ0TnrSMcy4RINQg+10wImVCw==","signature_status":"signed_v1","signed_at":"2026-07-02T01:17:50.207124Z","signed_message":"canonical_sha256_bytes"},"source_id":"2607.00642","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:346d658428e7b1953b7f18c36ec110c639fc6e0e4fdd5ab6fe91ab741b040e48","sha256:9285d76d0d838c965cd37715aeff00b5cc4ebea9d0c068becb3dbf1d152732ee"],"state_sha256":"cdb59325a293f0054033a00d5fbb91bea23002f68d5001d758cbe206a256204a"}