{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:AVFLXBK63PYS6MAILJKFZY7PPG","short_pith_number":"pith:AVFLXBK6","schema_version":"1.0","canonical_sha256":"054abb855edbf12f30085a545ce3ef79b059406d4869e6ea56db013f007e1708","source":{"kind":"arxiv","id":"1707.05589","version":2},"attestation_state":"computed","paper":{"title":"On the State of the Art of Evaluation in Neural Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chris Dyer, G\\'abor Melis, Phil Blunsom","submitted_at":"2017-07-18T12:35:53Z","abstract_excerpt":"Ongoing innovations in recurrent neural network architectures have provided a steady influx of apparently state-of-the-art results on language modelling benchmarks. However, these have been evaluated using differing code bases and limited computational resources, which represent uncontrolled sources of experimental variation. We reevaluate several popular architectures and regularisation methods with large-scale automatic black-box hyperparameter tuning and arrive at the somewhat surprising conclusion that standard LSTM architectures, when properly regularised, outperform more recent models. W"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.05589","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-18T12:35:53Z","cross_cats_sorted":[],"title_canon_sha256":"9e0b8a25a73b3d6f7acc8f9bbd0d4927c2e428a25515bd510592df34fab505ce","abstract_canon_sha256":"1cca659b5c247cb75fccaa1e93419bb673a404af1694020ec9fdd64b5f1b9368"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:30:11.313906Z","signature_b64":"68YjVF7V/Yfzc/WdRyPCr8OiB9hliYc6dmUloKQwN7WB9Zm9PZp3pI6IJM1lSHLT0MMlgnZuLKkCYEUiDDo4Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"054abb855edbf12f30085a545ce3ef79b059406d4869e6ea56db013f007e1708","last_reissued_at":"2026-05-18T00:30:11.313368Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:30:11.313368Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"On the State of the Art of Evaluation in Neural Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chris Dyer, G\\'abor Melis, Phil Blunsom","submitted_at":"2017-07-18T12:35:53Z","abstract_excerpt":"Ongoing innovations in recurrent neural network architectures have provided a steady influx of apparently state-of-the-art results on language modelling benchmarks. However, these have been evaluated using differing code bases and limited computational resources, which represent uncontrolled sources of experimental variation. We reevaluate several popular architectures and regularisation methods with large-scale automatic black-box hyperparameter tuning and arrive at the somewhat surprising conclusion that standard LSTM architectures, when properly regularised, outperform more recent models. W"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.05589","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.05589","created_at":"2026-05-18T00:30:11.313452+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.05589v2","created_at":"2026-05-18T00:30:11.313452+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.05589","created_at":"2026-05-18T00:30:11.313452+00:00"},{"alias_kind":"pith_short_12","alias_value":"AVFLXBK63PYS","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_16","alias_value":"AVFLXBK63PYS6MAI","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_8","alias_value":"AVFLXBK6","created_at":"2026-05-18T12:31:08.081275+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"1907.01463","citing_title":"Reproducibility in Machine Learning for Health","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2502.20349","citing_title":"Naturalistic Computational Cognitive Science: Towards generalizable models and theories that capture the full range of natural behavior","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2502.20349","citing_title":"Naturalistic Computational Cognitive Science: Towards generalizable models and theories that capture the full range of natural behavior","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2508.06974","citing_title":"Rethinking 1-bit Optimization Leveraging Pre-trained Large Language Models","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"1911.05507","citing_title":"Compressive Transformers for Long-Range Sequence Modelling","ref_index":98,"is_internal_anchor":true},{"citing_arxiv_id":"2304.05376","citing_title":"ChemCrow: Augmenting large-language models with chemistry tools","ref_index":107,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG","json":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG.json","graph_json":"https://pith.science/api/pith-number/AVFLXBK63PYS6MAILJKFZY7PPG/graph.json","events_json":"https://pith.science/api/pith-number/AVFLXBK63PYS6MAILJKFZY7PPG/events.json","paper":"https://pith.science/paper/AVFLXBK6"},"agent_actions":{"view_html":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG","download_json":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG.json","view_paper":"https://pith.science/paper/AVFLXBK6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.05589&json=true","fetch_graph":"https://pith.science/api/pith-number/AVFLXBK63PYS6MAILJKFZY7PPG/graph.json","fetch_events":"https://pith.science/api/pith-number/AVFLXBK63PYS6MAILJKFZY7PPG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG/action/storage_attestation","attest_author":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG/action/author_attestation","sign_citation":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG/action/citation_signature","submit_replication":"https://pith.science/pith/AVFLXBK63PYS6MAILJKFZY7PPG/action/replication_record"}},"created_at":"2026-05-18T00:30:11.313452+00:00","updated_at":"2026-05-18T00:30:11.313452+00:00"}