{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:XHVCUF7IMFPXEIP3BDPLGKHEDB","short_pith_number":"pith:XHVCUF7I","canonical_record":{"source":{"id":"1605.07700","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-25T01:33:34Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ffb7970333f52d4ffb16016106c50ac3ecf9f9ab632c993d6f4c3fa095c15675","abstract_canon_sha256":"1579409e797e52068ceccef4fa9c85df5788a3b3d0fd1bf520ad276cb87a90d3"},"schema_version":"1.0"},"canonical_sha256":"b9ea2a17e8615f7221fb08deb328e41875037d9601c8dee0c375a7627ca24411","source":{"kind":"arxiv","id":"1605.07700","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.07700","created_at":"2026-05-18T01:13:39Z"},{"alias_kind":"arxiv_version","alias_value":"1605.07700v1","created_at":"2026-05-18T01:13:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.07700","created_at":"2026-05-18T01:13:39Z"},{"alias_kind":"pith_short_12","alias_value":"XHVCUF7IMFPX","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_16","alias_value":"XHVCUF7IMFPXEIP3","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_8","alias_value":"XHVCUF7I","created_at":"2026-05-18T12:30:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:XHVCUF7IMFPXEIP3BDPLGKHEDB","target":"record","payload":{"canonical_record":{"source":{"id":"1605.07700","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-25T01:33:34Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ffb7970333f52d4ffb16016106c50ac3ecf9f9ab632c993d6f4c3fa095c15675","abstract_canon_sha256":"1579409e797e52068ceccef4fa9c85df5788a3b3d0fd1bf520ad276cb87a90d3"},"schema_version":"1.0"},"canonical_sha256":"b9ea2a17e8615f7221fb08deb328e41875037d9601c8dee0c375a7627ca24411","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:13:39.369969Z","signature_b64":"oX5reW90jqKGanQlv3QGFEdpBhOfJn7JLYXjIYAGcTslrujIBBEsyiFnyuKuGDU/eUx7hKuFRBD9sP+grrfbBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b9ea2a17e8615f7221fb08deb328e41875037d9601c8dee0c375a7627ca24411","last_reissued_at":"2026-05-18T01:13:39.369454Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:13:39.369454Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1605.07700","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:13:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5TcbX7daEdeNeigzlClssN26+pCFC7Gp4/f6dBPav5+VSx/zxoi0t3S590SP0nuyD8yKLkPD9QGafbK+mtaRBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T13:56:35.158576Z"},"content_sha256":"208ff9f4849dfab2434aa6e2263a7997c0793fd5d3652f6dee050120cf1ba43c","schema_version":"1.0","event_id":"sha256:208ff9f4849dfab2434aa6e2263a7997c0793fd5d3652f6dee050120cf1ba43c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:XHVCUF7IMFPXEIP3BDPLGKHEDB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Purposeful Behaviour in the Absence of Rewards","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Marlos C. Machado, Michael Bowling","submitted_at":"2016-05-25T01:33:34Z","abstract_excerpt":"Artificial intelligence is commonly defined as the ability to achieve goals in the world. In the reinforcement learning framework, goals are encoded as reward functions that guide agent behaviour, and the sum of observed rewards provide a notion of progress. However, some domains have no such reward signal, or have a reward signal so sparse as to appear absent. Without reward feedback, agent behaviour is typically random, often dithering aimlessly and lacking intentionality. In this paper we present an algorithm capable of learning purposeful behaviour in the absence of rewards. The algorithm "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.07700","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:13:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tjhMq5/8RBRfzwVeUW0N6LL74TEtfCy2bT2hEA1Tkq/yHg5vJ9IAKPjwBW0wvpI0Zpuj5e0UT+MtWNe3khrIBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T13:56:35.158925Z"},"content_sha256":"19e2290cbe87cfa3536faa7a092bbc2badcff6ecfc89e25d682441ce0f3ff9e6","schema_version":"1.0","event_id":"sha256:19e2290cbe87cfa3536faa7a092bbc2badcff6ecfc89e25d682441ce0f3ff9e6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XHVCUF7IMFPXEIP3BDPLGKHEDB/bundle.json","state_url":"https://pith.science/pith/XHVCUF7IMFPXEIP3BDPLGKHEDB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XHVCUF7IMFPXEIP3BDPLGKHEDB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T13:56:35Z","links":{"resolver":"https://pith.science/pith/XHVCUF7IMFPXEIP3BDPLGKHEDB","bundle":"https://pith.science/pith/XHVCUF7IMFPXEIP3BDPLGKHEDB/bundle.json","state":"https://pith.science/pith/XHVCUF7IMFPXEIP3BDPLGKHEDB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XHVCUF7IMFPXEIP3BDPLGKHEDB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:XHVCUF7IMFPXEIP3BDPLGKHEDB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1579409e797e52068ceccef4fa9c85df5788a3b3d0fd1bf520ad276cb87a90d3","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-25T01:33:34Z","title_canon_sha256":"ffb7970333f52d4ffb16016106c50ac3ecf9f9ab632c993d6f4c3fa095c15675"},"schema_version":"1.0","source":{"id":"1605.07700","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.07700","created_at":"2026-05-18T01:13:39Z"},{"alias_kind":"arxiv_version","alias_value":"1605.07700v1","created_at":"2026-05-18T01:13:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.07700","created_at":"2026-05-18T01:13:39Z"},{"alias_kind":"pith_short_12","alias_value":"XHVCUF7IMFPX","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_16","alias_value":"XHVCUF7IMFPXEIP3","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_8","alias_value":"XHVCUF7I","created_at":"2026-05-18T12:30:51Z"}],"graph_snapshots":[{"event_id":"sha256:19e2290cbe87cfa3536faa7a092bbc2badcff6ecfc89e25d682441ce0f3ff9e6","target":"graph","created_at":"2026-05-18T01:13:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Artificial intelligence is commonly defined as the ability to achieve goals in the world. In the reinforcement learning framework, goals are encoded as reward functions that guide agent behaviour, and the sum of observed rewards provide a notion of progress. However, some domains have no such reward signal, or have a reward signal so sparse as to appear absent. Without reward feedback, agent behaviour is typically random, often dithering aimlessly and lacking intentionality. In this paper we present an algorithm capable of learning purposeful behaviour in the absence of rewards. The algorithm ","authors_text":"Marlos C. Machado, Michael Bowling","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-25T01:33:34Z","title":"Learning Purposeful Behaviour in the Absence of Rewards"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.07700","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:208ff9f4849dfab2434aa6e2263a7997c0793fd5d3652f6dee050120cf1ba43c","target":"record","created_at":"2026-05-18T01:13:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1579409e797e52068ceccef4fa9c85df5788a3b3d0fd1bf520ad276cb87a90d3","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-05-25T01:33:34Z","title_canon_sha256":"ffb7970333f52d4ffb16016106c50ac3ecf9f9ab632c993d6f4c3fa095c15675"},"schema_version":"1.0","source":{"id":"1605.07700","kind":"arxiv","version":1}},"canonical_sha256":"b9ea2a17e8615f7221fb08deb328e41875037d9601c8dee0c375a7627ca24411","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b9ea2a17e8615f7221fb08deb328e41875037d9601c8dee0c375a7627ca24411","first_computed_at":"2026-05-18T01:13:39.369454Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:13:39.369454Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"oX5reW90jqKGanQlv3QGFEdpBhOfJn7JLYXjIYAGcTslrujIBBEsyiFnyuKuGDU/eUx7hKuFRBD9sP+grrfbBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:13:39.369969Z","signed_message":"canonical_sha256_bytes"},"source_id":"1605.07700","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:208ff9f4849dfab2434aa6e2263a7997c0793fd5d3652f6dee050120cf1ba43c","sha256:19e2290cbe87cfa3536faa7a092bbc2badcff6ecfc89e25d682441ce0f3ff9e6"],"state_sha256":"7899a8b76f7d5bd55b8bd4b15983793932f8bac2a47d603b53d95eece886ced8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mtSxSHjt8iFwJuSapfvcQ+nkfGI8jt++UKB25Nyn/ihGgItpLrm1EUa+OvgR3Q5bBe+VUcrA7jRofaDhMwz8Cw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T13:56:35.160830Z","bundle_sha256":"4453c87f9aab8fcc330bb3e34195519ab9eed06dd51ce0c1d0265528e1a84946"}}