{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:YU45F7PKTBII4UAGZIW6G4X2R6","short_pith_number":"pith:YU45F7PK","schema_version":"1.0","canonical_sha256":"c539d2fdea98508e5006ca2de372fa8fba72a02acdf759c19d048e1ffbb5891e","source":{"kind":"arxiv","id":"1511.06807","version":1},"attestation_state":"computed","paper":{"title":"Adding Gradient Noise Improves Learning for Very Deep Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Arvind Neelakantan, Ilya Sutskever, James Martens, Karol Kurach, Lukasz Kaiser, Luke Vilnis, Quoc V. Le","submitted_at":"2015-11-21T01:11:29Z","abstract_excerpt":"Deep feedforward and recurrent networks have achieved impressive results in many perception and language processing applications. This success is partially attributed to architectural innovations such as convolutional and long short-term memory networks. The main motivation for these architectural innovations is that they capture better domain knowledge, and importantly are easier to optimize than more basic architectures. Recently, more complex architectures such as Neural Turing Machines and Memory Networks have been proposed for tasks including question answering and general computation, cr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1511.06807","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-11-21T01:11:29Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"7e1a89fb4edcf8c6463df9c0cdf49bf6ec1ac6a3ab02ec1352df8538fac068b5","abstract_canon_sha256":"f9c5029bb2bb29034baa3874b04c0bdf279698519a57b875692658554b9a2c60"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:26:19.440145Z","signature_b64":"Zh/9Mhfi8AXUicPr6WI0swdnLcLPwHmh60ksbTQ8GlIPcLsw9zIq1a7EA+7HL2fujMU+Y63F3OGIUZ5NN+QSDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c539d2fdea98508e5006ca2de372fa8fba72a02acdf759c19d048e1ffbb5891e","last_reissued_at":"2026-05-18T01:26:19.439499Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:26:19.439499Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Adding Gradient Noise Improves Learning for Very Deep Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Arvind Neelakantan, Ilya Sutskever, James Martens, Karol Kurach, Lukasz Kaiser, Luke Vilnis, Quoc V. Le","submitted_at":"2015-11-21T01:11:29Z","abstract_excerpt":"Deep feedforward and recurrent networks have achieved impressive results in many perception and language processing applications. This success is partially attributed to architectural innovations such as convolutional and long short-term memory networks. The main motivation for these architectural innovations is that they capture better domain knowledge, and importantly are easier to optimize than more basic architectures. Recently, more complex architectures such as Neural Turing Machines and Memory Networks have been proposed for tasks including question answering and general computation, cr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1511.06807","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1511.06807","created_at":"2026-05-18T01:26:19.439610+00:00"},{"alias_kind":"arxiv_version","alias_value":"1511.06807v1","created_at":"2026-05-18T01:26:19.439610+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1511.06807","created_at":"2026-05-18T01:26:19.439610+00:00"},{"alias_kind":"pith_short_12","alias_value":"YU45F7PKTBII","created_at":"2026-05-18T12:29:52.810259+00:00"},{"alias_kind":"pith_short_16","alias_value":"YU45F7PKTBII4UAG","created_at":"2026-05-18T12:29:52.810259+00:00"},{"alias_kind":"pith_short_8","alias_value":"YU45F7PK","created_at":"2026-05-18T12:29:52.810259+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":12,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"2605.20402","citing_title":"Decomposing MXFP4 quantization error for LLM reinforcement learning: reducible bias, recoverable deadzone, and an irreducible floor","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20402","citing_title":"Decomposing MXFP4 quantization error for LLM reinforcement learning: reducible bias, recoverable deadzone, and an irreducible floor","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20534","citing_title":"Axiomatizing Neural Networks via Pursuit of Subspaces","ref_index":55,"is_internal_anchor":true},{"citing_arxiv_id":"2102.01293","citing_title":"Scaling Laws for Transfer","ref_index":160,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16334","citing_title":"Preventing overfitting in deep learning using differential privacy","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2202.08906","citing_title":"ST-MoE: Designing Stable and Transferable Sparse Expert Models","ref_index":180,"is_internal_anchor":false},{"citing_arxiv_id":"2604.25550","citing_title":"Enhancing SignSGD: Small-Batch Convergence Analysis and a Hybrid Switching Strategy","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2201.02177","citing_title":"Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets","ref_index":11,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06599","citing_title":"Weight-Decay Turns Transformer Loss Landscapes Villani: Functional-Analytic Foundations for Optimization and Generalization","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2112.00861","citing_title":"A General Language Assistant as a Laboratory for Alignment","ref_index":202,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04054","citing_title":"Endogenous Regime Switching Driven by Scalar-Irreducible Learning Dynamics","ref_index":13,"is_internal_anchor":false},{"citing_arxiv_id":"2207.05221","citing_title":"Language Models (Mostly) Know What They Know","ref_index":280,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6","json":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6.json","graph_json":"https://pith.science/api/pith-number/YU45F7PKTBII4UAGZIW6G4X2R6/graph.json","events_json":"https://pith.science/api/pith-number/YU45F7PKTBII4UAGZIW6G4X2R6/events.json","paper":"https://pith.science/paper/YU45F7PK"},"agent_actions":{"view_html":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6","download_json":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6.json","view_paper":"https://pith.science/paper/YU45F7PK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1511.06807&json=true","fetch_graph":"https://pith.science/api/pith-number/YU45F7PKTBII4UAGZIW6G4X2R6/graph.json","fetch_events":"https://pith.science/api/pith-number/YU45F7PKTBII4UAGZIW6G4X2R6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6/action/storage_attestation","attest_author":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6/action/author_attestation","sign_citation":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6/action/citation_signature","submit_replication":"https://pith.science/pith/YU45F7PKTBII4UAGZIW6G4X2R6/action/replication_record"}},"created_at":"2026-05-18T01:26:19.439610+00:00","updated_at":"2026-05-18T01:26:19.439610+00:00"}