{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:R2BV3YFUXUDAPXO4OO27W7NHUU","short_pith_number":"pith:R2BV3YFU","schema_version":"1.0","canonical_sha256":"8e835de0b4bd0607dddc73b5fb7da7a52aa38d5e0a786e655591110a949328b1","source":{"kind":"arxiv","id":"1606.02960","version":2},"attestation_state":"computed","paper":{"title":"Sequence-to-Sequence Learning as Beam-Search Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE","stat.ML"],"primary_cat":"cs.CL","authors_text":"Alexander M. Rush, Sam Wiseman","submitted_at":"2016-06-09T13:29:34Z","abstract_excerpt":"Sequence-to-Sequence (seq2seq) modeling has rapidly become an important general-purpose NLP tool that has proven effective for many text-generation and sequence-labeling tasks. Seq2seq builds on deep neural language modeling and inherits its remarkable accuracy in estimating local, next-word distributions. In this work, we introduce a model and beam-search training scheme, based on the work of Daume III and Marcu (2005), that extends seq2seq to learn global sequence scores. This structured approach avoids classical biases associated with local training and unifies the training loss with the te"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1606.02960","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-06-09T13:29:34Z","cross_cats_sorted":["cs.LG","cs.NE","stat.ML"],"title_canon_sha256":"a09261f425f7a22bec3cf55e86ef4d81dc6933beff7e567e92c5bf94e2dffdd6","abstract_canon_sha256":"a275c437d6a189afb954f6196102ac61a9a2e288a79a21fb4d53781319370073"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:59:41.921534Z","signature_b64":"wfuDHfomSeNdM9n51xlY4QkcCmYasClq8OpDWR3n/O3Q3rrL74/lau2Y7wQfZlU8OzkhEOq4ST9rfRLDqty6AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8e835de0b4bd0607dddc73b5fb7da7a52aa38d5e0a786e655591110a949328b1","last_reissued_at":"2026-05-18T00:59:41.920974Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:59:41.920974Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Sequence-to-Sequence Learning as Beam-Search Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE","stat.ML"],"primary_cat":"cs.CL","authors_text":"Alexander M. Rush, Sam Wiseman","submitted_at":"2016-06-09T13:29:34Z","abstract_excerpt":"Sequence-to-Sequence (seq2seq) modeling has rapidly become an important general-purpose NLP tool that has proven effective for many text-generation and sequence-labeling tasks. Seq2seq builds on deep neural language modeling and inherits its remarkable accuracy in estimating local, next-word distributions. In this work, we introduce a model and beam-search training scheme, based on the work of Daume III and Marcu (2005), that extends seq2seq to learn global sequence scores. This structured approach avoids classical biases associated with local training and unifies the training loss with the te"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.02960","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1606.02960","created_at":"2026-05-18T00:59:41.921063+00:00"},{"alias_kind":"arxiv_version","alias_value":"1606.02960v2","created_at":"2026-05-18T00:59:41.921063+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.02960","created_at":"2026-05-18T00:59:41.921063+00:00"},{"alias_kind":"pith_short_12","alias_value":"R2BV3YFUXUDA","created_at":"2026-05-18T12:30:41.710351+00:00"},{"alias_kind":"pith_short_16","alias_value":"R2BV3YFUXUDAPXO4","created_at":"2026-05-18T12:30:41.710351+00:00"},{"alias_kind":"pith_short_8","alias_value":"R2BV3YFU","created_at":"2026-05-18T12:30:41.710351+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2601.21619","citing_title":"On the Overscaling Curse of Parallel Thinking: System Efficacy Contradicts Sample Efficiency","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10172","citing_title":"V-ABS: Action-Observer Driven Beam Search for Dynamic Visual Reasoning","ref_index":20,"is_internal_anchor":false},{"citing_arxiv_id":"2605.09942","citing_title":"HAGE: Harnessing Agentic Memory via RL-Driven Weighted Graph Evolution","ref_index":54,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU","json":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU.json","graph_json":"https://pith.science/api/pith-number/R2BV3YFUXUDAPXO4OO27W7NHUU/graph.json","events_json":"https://pith.science/api/pith-number/R2BV3YFUXUDAPXO4OO27W7NHUU/events.json","paper":"https://pith.science/paper/R2BV3YFU"},"agent_actions":{"view_html":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU","download_json":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU.json","view_paper":"https://pith.science/paper/R2BV3YFU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1606.02960&json=true","fetch_graph":"https://pith.science/api/pith-number/R2BV3YFUXUDAPXO4OO27W7NHUU/graph.json","fetch_events":"https://pith.science/api/pith-number/R2BV3YFUXUDAPXO4OO27W7NHUU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU/action/storage_attestation","attest_author":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU/action/author_attestation","sign_citation":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU/action/citation_signature","submit_replication":"https://pith.science/pith/R2BV3YFUXUDAPXO4OO27W7NHUU/action/replication_record"}},"created_at":"2026-05-18T00:59:41.921063+00:00","updated_at":"2026-05-18T00:59:41.921063+00:00"}