{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4FMPN5N6VZ6FLX7NF37KVTPMQC","short_pith_number":"pith:4FMPN5N6","canonical_record":{"source":{"id":"2605.12682","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T19:32:10Z","cross_cats_sorted":[],"title_canon_sha256":"ced1e3e3b5d684046218a072f3657eb550a60cd2c52d1fd2291f19383b884340","abstract_canon_sha256":"ce38683c9821fecc4e2c6f4a527cb6bfeb40ad2037bf24756058168346ddcb78"},"schema_version":"1.0"},"canonical_sha256":"e158f6f5beae7c55dfed2efeaacdec80a46b15c1022bd734bddfb03da611998a","source":{"kind":"arxiv","id":"2605.12682","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12682","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12682v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12682","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"4FMPN5N6VZ6F","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4FMPN5N6VZ6FLX7N","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4FMPN5N6","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4FMPN5N6VZ6FLX7NF37KVTPMQC","target":"record","payload":{"canonical_record":{"source":{"id":"2605.12682","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T19:32:10Z","cross_cats_sorted":[],"title_canon_sha256":"ced1e3e3b5d684046218a072f3657eb550a60cd2c52d1fd2291f19383b884340","abstract_canon_sha256":"ce38683c9821fecc4e2c6f4a527cb6bfeb40ad2037bf24756058168346ddcb78"},"schema_version":"1.0"},"canonical_sha256":"e158f6f5beae7c55dfed2efeaacdec80a46b15c1022bd734bddfb03da611998a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:49.985773Z","signature_b64":"U3SvjnK3t+hPYV1E7ZdWQ/pr8E8SN46ErNTWkiZmbYfAVzxfVmOv2Scqg1rgV9GGYARtRB4oiXLoaWoaAmVmCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e158f6f5beae7c55dfed2efeaacdec80a46b15c1022bd734bddfb03da611998a","last_reissued_at":"2026-05-18T03:09:49.985123Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:49.985123Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.12682","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aK9Xy0ROohpHXS3SpE3yemacY1EYtRezaFwELIkB/pVtg43wpoL7urLZvtzvnZuFTL47+VZ+hQiJ6xSucgItDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-26T15:12:49.843081Z"},"content_sha256":"80b309559ca1e9aecc8c8d6e3a17d0fdd6b03a02c334923bacc57f7c58cf20c2","schema_version":"1.0","event_id":"sha256:80b309559ca1e9aecc8c8d6e3a17d0fdd6b03a02c334923bacc57f7c58cf20c2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4FMPN5N6VZ6FLX7NF37KVTPMQC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Transferable Latent User Preferences for Human-Aligned Decision Making","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"CLIPR learns transferable natural language rules from minimal conversations to align LLM decisions with latent user preferences.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Alina Hyk, Sandhya Saisubramanian","submitted_at":"2026-05-12T19:32:10Z","abstract_excerpt":"Large language models (LLMs) are increasingly used as reasoning modules in many applications. While they are efficient in certain tasks, LLMs often struggle to produce human-aligned solutions. Human-aligned decision making requires accounting for both explicitly stated goals and latent user preferences that shape how ambiguous situations should be resolved. Existing approaches to incorporating such preferences either rely on extensive and repeated user interactions or fail to generalize latent preferences across tasks and contexts, limiting their practical applicability. We consider a setting "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We introduce CLIPR ... a framework that learns actionable, transferable natural language rules that represent latent user preferences from minimal conversational input. These rules are iteratively refined through adaptive feedback and applied to both in-distribution and out-of-distribution ambiguous tasks across multiple environments. Evaluations on three datasets and a user study show that CLIPR consistently outperforms existing methods in improving alignment and reducing inference costs.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the natural language rules extracted from limited conversations are sufficiently transferable and actionable to guide downstream decision making across in- and out-of-distribution tasks without introducing new misalignment or requiring extensive validation.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"CLIPR learns transferable natural language rules for latent user preferences from minimal conversational input to improve LLM alignment in decision making and outperforms prior methods on three datasets plus a user study.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"CLIPR learns transferable natural language rules from minimal conversations to align LLM decisions with latent user preferences.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"9919234b654b097639f497a0a2e74cfdd52b5693f19b0fb6fb4e2954495d3407"},"source":{"id":"2605.12682","kind":"arxiv","version":1},"verdict":{"id":"5943deb5-c482-46ab-8b64-222f52b3d008","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T20:21:03.374271Z","strongest_claim":"We introduce CLIPR ... a framework that learns actionable, transferable natural language rules that represent latent user preferences from minimal conversational input. These rules are iteratively refined through adaptive feedback and applied to both in-distribution and out-of-distribution ambiguous tasks across multiple environments. Evaluations on three datasets and a user study show that CLIPR consistently outperforms existing methods in improving alignment and reducing inference costs.","one_line_summary":"CLIPR learns transferable natural language rules for latent user preferences from minimal conversational input to improve LLM alignment in decision making and outperforms prior methods on three datasets plus a user study.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the natural language rules extracted from limited conversations are sufficiently transferable and actionable to guide downstream decision making across in- and out-of-distribution tasks without introducing new misalignment or requiring extensive validation.","pith_extraction_headline":"CLIPR learns transferable natural language rules from minimal conversations to align LLM decisions with latent user preferences."},"references":{"count":29,"sample":[{"doi":"","year":null,"title":"You have at most {max_msg} messages with the user","work_id":"2ae91a0e-f541-4e49-965e-7aecfd4836a6","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"When you have learned enough, end your response with \"PAUSE: true\". Be conversational but efficient. Filled-in example (excerpt). You are a moderator learning user preferences for a kitchen and home r","work_id":"af65032a-3fcc-421b-a8b7-fb6090685dc5","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Pour sparkling water","work_id":"b1cbee28-94d4-437d-8212-6af95fa7ea90","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Bring me a snack","work_id":"4c89202a-9ba1-4304-84f9-bec8a95a16f1","ref_index":6,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"8 additional training scenarios ...] Rules for this conversation:","work_id":"f1e4f0eb-b615-4898-a83d-841a9960507b","ref_index":8,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":29,"snapshot_sha256":"cfe86e0159343b260086008c09f7f76f6443ef0f184e8aaa66c997817e23a084","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"5943deb5-c482-46ab-8b64-222f52b3d008"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m2l4rPubwdJwmHOU8ZA3/e2dmPW5CBhSQlKRI3ZSKr6woej1cKwt6zhArGeLrgepobAjWsgHa+1zx/fBlSFXBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-26T15:12:49.843629Z"},"content_sha256":"de5dea35e9c6e8d76099a488558f04eb9c43a3fdaa557800f27dd30b908faac8","schema_version":"1.0","event_id":"sha256:de5dea35e9c6e8d76099a488558f04eb9c43a3fdaa557800f27dd30b908faac8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4FMPN5N6VZ6FLX7NF37KVTPMQC/bundle.json","state_url":"https://pith.science/pith/4FMPN5N6VZ6FLX7NF37KVTPMQC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4FMPN5N6VZ6FLX7NF37KVTPMQC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-26T15:12:49Z","links":{"resolver":"https://pith.science/pith/4FMPN5N6VZ6FLX7NF37KVTPMQC","bundle":"https://pith.science/pith/4FMPN5N6VZ6FLX7NF37KVTPMQC/bundle.json","state":"https://pith.science/pith/4FMPN5N6VZ6FLX7NF37KVTPMQC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4FMPN5N6VZ6FLX7NF37KVTPMQC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4FMPN5N6VZ6FLX7NF37KVTPMQC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ce38683c9821fecc4e2c6f4a527cb6bfeb40ad2037bf24756058168346ddcb78","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T19:32:10Z","title_canon_sha256":"ced1e3e3b5d684046218a072f3657eb550a60cd2c52d1fd2291f19383b884340"},"schema_version":"1.0","source":{"id":"2605.12682","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12682","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12682v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12682","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"4FMPN5N6VZ6F","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4FMPN5N6VZ6FLX7N","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4FMPN5N6","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:de5dea35e9c6e8d76099a488558f04eb9c43a3fdaa557800f27dd30b908faac8","target":"graph","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We introduce CLIPR ... a framework that learns actionable, transferable natural language rules that represent latent user preferences from minimal conversational input. These rules are iteratively refined through adaptive feedback and applied to both in-distribution and out-of-distribution ambiguous tasks across multiple environments. Evaluations on three datasets and a user study show that CLIPR consistently outperforms existing methods in improving alignment and reducing inference costs."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the natural language rules extracted from limited conversations are sufficiently transferable and actionable to guide downstream decision making across in- and out-of-distribution tasks without introducing new misalignment or requiring extensive validation."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"CLIPR learns transferable natural language rules for latent user preferences from minimal conversational input to improve LLM alignment in decision making and outperforms prior methods on three datasets plus a user study."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"CLIPR learns transferable natural language rules from minimal conversations to align LLM decisions with latent user preferences."}],"snapshot_sha256":"9919234b654b097639f497a0a2e74cfdd52b5693f19b0fb6fb4e2954495d3407"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Large language models (LLMs) are increasingly used as reasoning modules in many applications. While they are efficient in certain tasks, LLMs often struggle to produce human-aligned solutions. Human-aligned decision making requires accounting for both explicitly stated goals and latent user preferences that shape how ambiguous situations should be resolved. Existing approaches to incorporating such preferences either rely on extensive and repeated user interactions or fail to generalize latent preferences across tasks and contexts, limiting their practical applicability. We consider a setting ","authors_text":"Alina Hyk, Sandhya Saisubramanian","cross_cats":[],"headline":"CLIPR learns transferable natural language rules from minimal conversations to align LLM decisions with latent user preferences.","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T19:32:10Z","title":"Learning Transferable Latent User Preferences for Human-Aligned Decision Making"},"references":{"count":29,"internal_anchors":0,"resolved_work":29,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"You have at most {max_msg} messages with the user","work_id":"2ae91a0e-f541-4e49-965e-7aecfd4836a6","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"When you have learned enough, end your response with \"PAUSE: true\". Be conversational but efficient. Filled-in example (excerpt). You are a moderator learning user preferences for a kitchen and home r","work_id":"af65032a-3fcc-421b-a8b7-fb6090685dc5","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Pour sparkling water","work_id":"b1cbee28-94d4-437d-8212-6af95fa7ea90","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":6,"title":"Bring me a snack","work_id":"4c89202a-9ba1-4304-84f9-bec8a95a16f1","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":8,"title":"8 additional training scenarios ...] Rules for this conversation:","work_id":"f1e4f0eb-b615-4898-a83d-841a9960507b","year":null}],"snapshot_sha256":"cfe86e0159343b260086008c09f7f76f6443ef0f184e8aaa66c997817e23a084"},"source":{"id":"2605.12682","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T20:21:03.374271Z","id":"5943deb5-c482-46ab-8b64-222f52b3d008","model_set":{"reader":"grok-4.3"},"one_line_summary":"CLIPR learns transferable natural language rules for latent user preferences from minimal conversational input to improve LLM alignment in decision making and outperforms prior methods on three datasets plus a user study.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"CLIPR learns transferable natural language rules from minimal conversations to align LLM decisions with latent user preferences.","strongest_claim":"We introduce CLIPR ... a framework that learns actionable, transferable natural language rules that represent latent user preferences from minimal conversational input. These rules are iteratively refined through adaptive feedback and applied to both in-distribution and out-of-distribution ambiguous tasks across multiple environments. Evaluations on three datasets and a user study show that CLIPR consistently outperforms existing methods in improving alignment and reducing inference costs.","weakest_assumption":"That the natural language rules extracted from limited conversations are sufficiently transferable and actionable to guide downstream decision making across in- and out-of-distribution tasks without introducing new misalignment or requiring extensive validation."}},"verdict_id":"5943deb5-c482-46ab-8b64-222f52b3d008"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:80b309559ca1e9aecc8c8d6e3a17d0fdd6b03a02c334923bacc57f7c58cf20c2","target":"record","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ce38683c9821fecc4e2c6f4a527cb6bfeb40ad2037bf24756058168346ddcb78","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T19:32:10Z","title_canon_sha256":"ced1e3e3b5d684046218a072f3657eb550a60cd2c52d1fd2291f19383b884340"},"schema_version":"1.0","source":{"id":"2605.12682","kind":"arxiv","version":1}},"canonical_sha256":"e158f6f5beae7c55dfed2efeaacdec80a46b15c1022bd734bddfb03da611998a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e158f6f5beae7c55dfed2efeaacdec80a46b15c1022bd734bddfb03da611998a","first_computed_at":"2026-05-18T03:09:49.985123Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:49.985123Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"U3SvjnK3t+hPYV1E7ZdWQ/pr8E8SN46ErNTWkiZmbYfAVzxfVmOv2Scqg1rgV9GGYARtRB4oiXLoaWoaAmVmCw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:49.985773Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12682","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:80b309559ca1e9aecc8c8d6e3a17d0fdd6b03a02c334923bacc57f7c58cf20c2","sha256:de5dea35e9c6e8d76099a488558f04eb9c43a3fdaa557800f27dd30b908faac8"],"state_sha256":"925739115573778a854126a200109da3e0055acbcf13b6372d1ab2783f36f1ed"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ccyHk+mcnAguB2xzDiq4HsPMiXhcIWvwlJ3Yj2Gt3e6RQNwaTKnZPKDIAdWResWaRE3VmlYW0OJHdRCNTVnGCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-26T15:12:49.846084Z","bundle_sha256":"38cbddea22c95f8a47848908b125293499bdac67c15ae4a97cd869fda7c57c16"}}