{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TFSRN2UZFC6QLCIEBHX3LKNM6F","short_pith_number":"pith:TFSRN2UZ","schema_version":"1.0","canonical_sha256":"996516ea9928bd05890409efb5a9acf16a1230009c2fb328d4a625e71aa09b7b","source":{"kind":"arxiv","id":"2602.09533","version":2},"attestation_state":"computed","paper":{"title":"Autoregressive Direct Preference Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Mahiro Ukai, Masahiro Kaneko, Masanari Oi, Nakamasa Inoue, Naoaki Okazaki","submitted_at":"2026-02-10T08:45:30Z","abstract_excerpt":"Direct preference optimization (DPO) has emerged as a promising approach for aligning large language models (LLMs) with human preferences. However, the widespread reliance on the response-level Bradley-Terry (BT) model may limit its full potential, as the reference and learnable models are assumed to be autoregressive only after deriving the objective function. Motivated by this limitation, we revisit the theoretical foundations of DPO and propose a novel formulation that explicitly introduces the autoregressive assumption prior to applying the BT model. By reformulating and extending DPO, we "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.09533","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-02-10T08:45:30Z","cross_cats_sorted":[],"title_canon_sha256":"4cddc76ac4702789ca02aee3a2f5d7bc61fc582b896f418a849c1252f1c3aeb5","abstract_canon_sha256":"9c1bb2c5c2257661883549c29bc815729d47c7d333e0fdccfeb2d388de7bc21f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:09:30.286166Z","signature_b64":"QgXyLkyi7pP6kCgPizkPaNY+L+X6mapSz2k7J3LFkYP/kdu0Y+RUxXY0umFneohC/xZXaCqopOHBezsrMVYlAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"996516ea9928bd05890409efb5a9acf16a1230009c2fb328d4a625e71aa09b7b","last_reissued_at":"2026-06-11T01:09:30.285026Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:09:30.285026Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Autoregressive Direct Preference Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Mahiro Ukai, Masahiro Kaneko, Masanari Oi, Nakamasa Inoue, Naoaki Okazaki","submitted_at":"2026-02-10T08:45:30Z","abstract_excerpt":"Direct preference optimization (DPO) has emerged as a promising approach for aligning large language models (LLMs) with human preferences. However, the widespread reliance on the response-level Bradley-Terry (BT) model may limit its full potential, as the reference and learnable models are assumed to be autoregressive only after deriving the objective function. Motivated by this limitation, we revisit the theoretical foundations of DPO and propose a novel formulation that explicitly introduces the autoregressive assumption prior to applying the BT model. By reformulating and extending DPO, we "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.09533","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.09533/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.09533","created_at":"2026-06-11T01:09:30.285163+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.09533v2","created_at":"2026-06-11T01:09:30.285163+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.09533","created_at":"2026-06-11T01:09:30.285163+00:00"},{"alias_kind":"pith_short_12","alias_value":"TFSRN2UZFC6Q","created_at":"2026-06-11T01:09:30.285163+00:00"},{"alias_kind":"pith_short_16","alias_value":"TFSRN2UZFC6QLCIE","created_at":"2026-06-11T01:09:30.285163+00:00"},{"alias_kind":"pith_short_8","alias_value":"TFSRN2UZ","created_at":"2026-06-11T01:09:30.285163+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F","json":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F.json","graph_json":"https://pith.science/api/pith-number/TFSRN2UZFC6QLCIEBHX3LKNM6F/graph.json","events_json":"https://pith.science/api/pith-number/TFSRN2UZFC6QLCIEBHX3LKNM6F/events.json","paper":"https://pith.science/paper/TFSRN2UZ"},"agent_actions":{"view_html":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F","download_json":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F.json","view_paper":"https://pith.science/paper/TFSRN2UZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.09533&json=true","fetch_graph":"https://pith.science/api/pith-number/TFSRN2UZFC6QLCIEBHX3LKNM6F/graph.json","fetch_events":"https://pith.science/api/pith-number/TFSRN2UZFC6QLCIEBHX3LKNM6F/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F/action/storage_attestation","attest_author":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F/action/author_attestation","sign_citation":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F/action/citation_signature","submit_replication":"https://pith.science/pith/TFSRN2UZFC6QLCIEBHX3LKNM6F/action/replication_record"}},"created_at":"2026-06-11T01:09:30.285163+00:00","updated_at":"2026-06-11T01:09:30.285163+00:00"}