{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:HNVDOVBO3OV2Q2OROTMUANYC3X","short_pith_number":"pith:HNVDOVBO","schema_version":"1.0","canonical_sha256":"3b6a37542edbaba869d174d9403702ddeaf8da707d4518bebab41ee7ae9e796b","source":{"kind":"arxiv","id":"1504.00941","version":2},"attestation_state":"computed","paper":{"title":"A Simple Way to Initialize Recurrent Networks of Rectified Linear Units","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"Geoffrey E. Hinton, Navdeep Jaitly, Quoc V. Le","submitted_at":"2015-04-03T21:22:52Z","abstract_excerpt":"Learning long term dependencies in recurrent networks is difficult due to vanishing and exploding gradients. To overcome this difficulty, researchers have developed sophisticated optimization techniques and network architectures. In this paper, we propose a simpler solution that use recurrent neural networks composed of rectified linear units. Key to our solution is the use of the identity matrix or its scaled version to initialize the recurrent weight matrix. We find that our solution is comparable to LSTM on our four benchmarks: two toy problems involving long-range temporal structures, a la"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1504.00941","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.NE","submitted_at":"2015-04-03T21:22:52Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"a617aae8b489c6c000ed0db921dc1aa91c42caccf1d5154649f250a7306328f5","abstract_canon_sha256":"2198a0dc7cb1127f773041cd0ff6e5d00f1effa7833507fd01364c811dc237ef"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:19:26.186245Z","signature_b64":"PQYKm/kBtLlr1WGwM22kvfWslnmJ2+mw8+oPNPeQwWgo1dNRlTBUW4BIlBi9rTPDzquGtt2dkCFoQ10EGaDOCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3b6a37542edbaba869d174d9403702ddeaf8da707d4518bebab41ee7ae9e796b","last_reissued_at":"2026-05-18T02:19:26.185406Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:19:26.185406Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Simple Way to Initialize Recurrent Networks of Rectified Linear Units","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"Geoffrey E. Hinton, Navdeep Jaitly, Quoc V. Le","submitted_at":"2015-04-03T21:22:52Z","abstract_excerpt":"Learning long term dependencies in recurrent networks is difficult due to vanishing and exploding gradients. To overcome this difficulty, researchers have developed sophisticated optimization techniques and network architectures. In this paper, we propose a simpler solution that use recurrent neural networks composed of rectified linear units. Key to our solution is the use of the identity matrix or its scaled version to initialize the recurrent weight matrix. We find that our solution is comparable to LSTM on our four benchmarks: two toy problems involving long-range temporal structures, a la"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1504.00941","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1504.00941","created_at":"2026-05-18T02:19:26.185565+00:00"},{"alias_kind":"arxiv_version","alias_value":"1504.00941v2","created_at":"2026-05-18T02:19:26.185565+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1504.00941","created_at":"2026-05-18T02:19:26.185565+00:00"},{"alias_kind":"pith_short_12","alias_value":"HNVDOVBO3OV2","created_at":"2026-05-18T12:29:25.134429+00:00"},{"alias_kind":"pith_short_16","alias_value":"HNVDOVBO3OV2Q2OR","created_at":"2026-05-18T12:29:25.134429+00:00"},{"alias_kind":"pith_short_8","alias_value":"HNVDOVBO","created_at":"2026-05-18T12:29:25.134429+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"1907.00664","citing_title":"Learning World Graphs to Accelerate Hierarchical Reinforcement Learning","ref_index":55,"is_internal_anchor":true},{"citing_arxiv_id":"1907.05572","citing_title":"R-Transformer: Recurrent Neural Network Enhanced Transformer","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16067","citing_title":"SAFE Quantum Machine Learning with Variational Quantum Classifiers","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2603.14360","citing_title":"M$^2$RNN: Non-Linear RNNs with Matrix-Valued States for Scalable Language Modeling","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01656","citing_title":"From Cortical Synchronous Rhythm to Brain Inspired Learning Mechanism: An Oscillatory Spiking Neural Network with Time-Delayed Coordination","ref_index":20,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X","json":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X.json","graph_json":"https://pith.science/api/pith-number/HNVDOVBO3OV2Q2OROTMUANYC3X/graph.json","events_json":"https://pith.science/api/pith-number/HNVDOVBO3OV2Q2OROTMUANYC3X/events.json","paper":"https://pith.science/paper/HNVDOVBO"},"agent_actions":{"view_html":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X","download_json":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X.json","view_paper":"https://pith.science/paper/HNVDOVBO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1504.00941&json=true","fetch_graph":"https://pith.science/api/pith-number/HNVDOVBO3OV2Q2OROTMUANYC3X/graph.json","fetch_events":"https://pith.science/api/pith-number/HNVDOVBO3OV2Q2OROTMUANYC3X/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X/action/storage_attestation","attest_author":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X/action/author_attestation","sign_citation":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X/action/citation_signature","submit_replication":"https://pith.science/pith/HNVDOVBO3OV2Q2OROTMUANYC3X/action/replication_record"}},"created_at":"2026-05-18T02:19:26.185565+00:00","updated_at":"2026-05-18T02:19:26.185565+00:00"}