{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4IND3MJHI54XIRAP7JTTKIXEGN","short_pith_number":"pith:4IND3MJH","canonical_record":{"source":{"id":"2602.15206","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-16T21:36:28Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"8a71e5a02be7f9d5a6dd8bee5d66b007ce19c72369733921a4914e95886a2449","abstract_canon_sha256":"b4e1f5d2784c3d7a977354124880954075998c979e202a97c4361b5f345056d7"},"schema_version":"1.0"},"canonical_sha256":"e21a3db127477974440ffa673522e43363ce1ae38fa12ab180b71768c0b74969","source":{"kind":"arxiv","id":"2602.15206","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.15206","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"arxiv_version","alias_value":"2602.15206v2","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.15206","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"pith_short_12","alias_value":"4IND3MJHI54X","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"pith_short_16","alias_value":"4IND3MJHI54XIRAP","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"pith_short_8","alias_value":"4IND3MJH","created_at":"2026-06-23T01:12:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4IND3MJHI54XIRAP7JTTKIXEGN","target":"record","payload":{"canonical_record":{"source":{"id":"2602.15206","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-16T21:36:28Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"8a71e5a02be7f9d5a6dd8bee5d66b007ce19c72369733921a4914e95886a2449","abstract_canon_sha256":"b4e1f5d2784c3d7a977354124880954075998c979e202a97c4361b5f345056d7"},"schema_version":"1.0"},"canonical_sha256":"e21a3db127477974440ffa673522e43363ce1ae38fa12ab180b71768c0b74969","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:12:02.071440Z","signature_b64":"vx9Q4CdW76m9mJXDIbhDFBzMBUXE+ShNm6CX06hJuTRIBKHZwKgKl/du/IdBdZTXKW5RpjH+e0G3gQLNu1ZTCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e21a3db127477974440ffa673522e43363ce1ae38fa12ab180b71768c0b74969","last_reissued_at":"2026-06-23T01:12:02.070900Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:12:02.070900Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.15206","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:12:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AFjL6QW/TvgJQ5MKF1YH0NxTyqWbKHXi5ydhmXpwl1+PgknSJ+FOajsr/XT1z45K/B5oS5CLQwjbIgyaurRoAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T19:59:27.942056Z"},"content_sha256":"9d2c6de7856556a43fbc06947c334249d947676d3f6cd37af49cc60e24363452","schema_version":"1.0","event_id":"sha256:9d2c6de7856556a43fbc06947c334249d947676d3f6cd37af49cc60e24363452"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4IND3MJHI54XIRAP7JTTKIXEGN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MAVRL: Learning Reward Functions from Multiple Feedback Types with Amortized Variational Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Giorgia Ramponi, Maria Gkoulta, Mennatallah El-Assady, Rapha\\\"el Baur, Thomas Kleine Buening, Yannick Metz","submitted_at":"2026-02-16T21:36:28Z","abstract_excerpt":"Reward learning typically relies on a single feedback type or combines multiple feedback types using manually weighted loss terms. Currently, it remains unclear how to jointly learn reward functions from heterogeneous feedback types such as demonstrations, comparisons, ratings, and stops that provide qualitatively different signals. We address this challenge by formulating reward learning from multiple feedback types as Bayesian inference over a shared latent reward function, where each feedback type contributes information through an explicit likelihood. We introduce a scalable amortized vari"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.15206","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.15206/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:12:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UGBJk9DPMbq6W1Xpfc1ycyaO1F/NDSJw2PYYZ5s9F+KXlSqJRFTzv/86brG7z0IoPD9ySMu6NvALDNL0xjfJAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T19:59:27.942465Z"},"content_sha256":"03e69ee883f9d6563cc25ae41336f6a651a4e2f457e389ca0b72f16e02219d97","schema_version":"1.0","event_id":"sha256:03e69ee883f9d6563cc25ae41336f6a651a4e2f457e389ca0b72f16e02219d97"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4IND3MJHI54XIRAP7JTTKIXEGN/bundle.json","state_url":"https://pith.science/pith/4IND3MJHI54XIRAP7JTTKIXEGN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4IND3MJHI54XIRAP7JTTKIXEGN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T19:59:27Z","links":{"resolver":"https://pith.science/pith/4IND3MJHI54XIRAP7JTTKIXEGN","bundle":"https://pith.science/pith/4IND3MJHI54XIRAP7JTTKIXEGN/bundle.json","state":"https://pith.science/pith/4IND3MJHI54XIRAP7JTTKIXEGN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4IND3MJHI54XIRAP7JTTKIXEGN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4IND3MJHI54XIRAP7JTTKIXEGN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b4e1f5d2784c3d7a977354124880954075998c979e202a97c4361b5f345056d7","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-16T21:36:28Z","title_canon_sha256":"8a71e5a02be7f9d5a6dd8bee5d66b007ce19c72369733921a4914e95886a2449"},"schema_version":"1.0","source":{"id":"2602.15206","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.15206","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"arxiv_version","alias_value":"2602.15206v2","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.15206","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"pith_short_12","alias_value":"4IND3MJHI54X","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"pith_short_16","alias_value":"4IND3MJHI54XIRAP","created_at":"2026-06-23T01:12:02Z"},{"alias_kind":"pith_short_8","alias_value":"4IND3MJH","created_at":"2026-06-23T01:12:02Z"}],"graph_snapshots":[{"event_id":"sha256:03e69ee883f9d6563cc25ae41336f6a651a4e2f457e389ca0b72f16e02219d97","target":"graph","created_at":"2026-06-23T01:12:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.15206/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reward learning typically relies on a single feedback type or combines multiple feedback types using manually weighted loss terms. Currently, it remains unclear how to jointly learn reward functions from heterogeneous feedback types such as demonstrations, comparisons, ratings, and stops that provide qualitatively different signals. We address this challenge by formulating reward learning from multiple feedback types as Bayesian inference over a shared latent reward function, where each feedback type contributes information through an explicit likelihood. We introduce a scalable amortized vari","authors_text":"Giorgia Ramponi, Maria Gkoulta, Mennatallah El-Assady, Rapha\\\"el Baur, Thomas Kleine Buening, Yannick Metz","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-16T21:36:28Z","title":"MAVRL: Learning Reward Functions from Multiple Feedback Types with Amortized Variational Inference"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.15206","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9d2c6de7856556a43fbc06947c334249d947676d3f6cd37af49cc60e24363452","target":"record","created_at":"2026-06-23T01:12:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b4e1f5d2784c3d7a977354124880954075998c979e202a97c4361b5f345056d7","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-16T21:36:28Z","title_canon_sha256":"8a71e5a02be7f9d5a6dd8bee5d66b007ce19c72369733921a4914e95886a2449"},"schema_version":"1.0","source":{"id":"2602.15206","kind":"arxiv","version":2}},"canonical_sha256":"e21a3db127477974440ffa673522e43363ce1ae38fa12ab180b71768c0b74969","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e21a3db127477974440ffa673522e43363ce1ae38fa12ab180b71768c0b74969","first_computed_at":"2026-06-23T01:12:02.070900Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T01:12:02.070900Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"vx9Q4CdW76m9mJXDIbhDFBzMBUXE+ShNm6CX06hJuTRIBKHZwKgKl/du/IdBdZTXKW5RpjH+e0G3gQLNu1ZTCg==","signature_status":"signed_v1","signed_at":"2026-06-23T01:12:02.071440Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.15206","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9d2c6de7856556a43fbc06947c334249d947676d3f6cd37af49cc60e24363452","sha256:03e69ee883f9d6563cc25ae41336f6a651a4e2f457e389ca0b72f16e02219d97"],"state_sha256":"5d0fa573f412c00a7270715159218039bdd68c24d9b666e86774515353fa168f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pZq6QadJ1DguZODt5k+ZmYH+M5pKOs2sbhiIFrr9UXwQ/63Q31F2sasM+Pf0vtRV9qdOM4YM68W78wOE2pqtCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T19:59:27.944496Z","bundle_sha256":"691282e47deff32c98e450b10873dc808d85f6ac47faa6a74deeb3aeef6f2fc1"}}