{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:6LWUZD2DITQIXYSHOTHNIRJND7","short_pith_number":"pith:6LWUZD2D","schema_version":"1.0","canonical_sha256":"f2ed4c8f4344e08be24774ced4452d1fedb3999f71ce99fe16bddf298331c33a","source":{"kind":"arxiv","id":"1506.03099","version":3},"attestation_state":"computed","paper":{"title":"Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.CV"],"primary_cat":"cs.LG","authors_text":"Navdeep Jaitly, Noam Shazeer, Oriol Vinyals, Samy Bengio","submitted_at":"2015-06-09T20:33:47Z","abstract_excerpt":"Recurrent Neural Networks can be trained to produce sequences of tokens given some input, as exemplified by recent results in machine translation and image captioning. The current approach to training them consists of maximizing the likelihood of each token in the sequence given the current (recurrent) state and the previous token. At inference, the unknown previous token is then replaced by a token generated by the model itself. This discrepancy between training and inference can yield errors that can accumulate quickly along the generated sequence. We propose a curriculum learning strategy t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1506.03099","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-06-09T20:33:47Z","cross_cats_sorted":["cs.CL","cs.CV"],"title_canon_sha256":"c5407e528fa3813ff97076099096031b85f6edc86e45adec72eb152a365c36f5","abstract_canon_sha256":"2d0c51582414cf253d669689748a2dad97d6c107366cac53b6d992cd0c02fcf9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:32:18.444486Z","signature_b64":"Vm5TW41yFFfSypIvAXlil7RU750ZQkFBmJI9nDStD059Er3QNgd9aRrSPYGU2x+ZEy0aKTNZ2rljufmdBSpdAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f2ed4c8f4344e08be24774ced4452d1fedb3999f71ce99fe16bddf298331c33a","last_reissued_at":"2026-05-18T01:32:18.443688Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:32:18.443688Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.CV"],"primary_cat":"cs.LG","authors_text":"Navdeep Jaitly, Noam Shazeer, Oriol Vinyals, Samy Bengio","submitted_at":"2015-06-09T20:33:47Z","abstract_excerpt":"Recurrent Neural Networks can be trained to produce sequences of tokens given some input, as exemplified by recent results in machine translation and image captioning. The current approach to training them consists of maximizing the likelihood of each token in the sequence given the current (recurrent) state and the previous token. At inference, the unknown previous token is then replaced by a token generated by the model itself. This discrepancy between training and inference can yield errors that can accumulate quickly along the generated sequence. We propose a curriculum learning strategy t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1506.03099","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1506.03099","created_at":"2026-05-18T01:32:18.443802+00:00"},{"alias_kind":"arxiv_version","alias_value":"1506.03099v3","created_at":"2026-05-18T01:32:18.443802+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1506.03099","created_at":"2026-05-18T01:32:18.443802+00:00"},{"alias_kind":"pith_short_12","alias_value":"6LWUZD2DITQI","created_at":"2026-05-18T12:29:07.941421+00:00"},{"alias_kind":"pith_short_16","alias_value":"6LWUZD2DITQIXYSH","created_at":"2026-05-18T12:29:07.941421+00:00"},{"alias_kind":"pith_short_8","alias_value":"6LWUZD2D","created_at":"2026-05-18T12:29:07.941421+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"1907.07769","citing_title":"Hierarchical Sequence to Sequence Voice Conversion with Limited Data","ref_index":51,"is_internal_anchor":true},{"citing_arxiv_id":"2512.24497","citing_title":"What Drives Success in Physical Planning with Joint-Embedding Predictive World Models?","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07292","citing_title":"Graph Neural ODE Digital Twins for Control-Oriented Reactor Thermal-Hydraulic Forecasting Under Partial Observability","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11856","citing_title":"UniVLR: Unifying Text and Vision in Visual Latent Reasoning for Multimodal LLMs","ref_index":39,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07955","citing_title":"TimeLesSeg: Unified Contrast-Agnostic Cross-Sectional and Longitudinal MS Lesion Segmentation via a Stochastic Generative Model","ref_index":1,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07292","citing_title":"Graph Neural ODE Digital Twins for Control-Oriented Reactor Thermal-Hydraulic Forecasting Under Partial Observability","ref_index":37,"is_internal_anchor":false},{"citing_arxiv_id":"2604.06475","citing_title":"AE-ViT: Stable Long-Horizon Parametric Partial Differential Equations Modeling","ref_index":8,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7","json":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7.json","graph_json":"https://pith.science/api/pith-number/6LWUZD2DITQIXYSHOTHNIRJND7/graph.json","events_json":"https://pith.science/api/pith-number/6LWUZD2DITQIXYSHOTHNIRJND7/events.json","paper":"https://pith.science/paper/6LWUZD2D"},"agent_actions":{"view_html":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7","download_json":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7.json","view_paper":"https://pith.science/paper/6LWUZD2D","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1506.03099&json=true","fetch_graph":"https://pith.science/api/pith-number/6LWUZD2DITQIXYSHOTHNIRJND7/graph.json","fetch_events":"https://pith.science/api/pith-number/6LWUZD2DITQIXYSHOTHNIRJND7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7/action/storage_attestation","attest_author":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7/action/author_attestation","sign_citation":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7/action/citation_signature","submit_replication":"https://pith.science/pith/6LWUZD2DITQIXYSHOTHNIRJND7/action/replication_record"}},"created_at":"2026-05-18T01:32:18.443802+00:00","updated_at":"2026-05-18T01:32:18.443802+00:00"}