{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:4JIX4JC5OC6QMTEXYQZIW7GQRS","short_pith_number":"pith:4JIX4JC5","canonical_record":{"source":{"id":"1811.00659","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-01T22:24:25Z","cross_cats_sorted":["cs.LG","cs.NE"],"title_canon_sha256":"362c66e256c8e9498c7d996bca1f115c6879b368ca9e0c4ea5a4bc8462c588a0","abstract_canon_sha256":"33d9547b2d40d4cd68e13a59acc6a6c7dfddfd79bc898bf4629c156881bb853f"},"schema_version":"1.0"},"canonical_sha256":"e2517e245d70bd064c97c4328b7cd08cb42f894eff9e64307ef6ae852101b5b3","source":{"kind":"arxiv","id":"1811.00659","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.00659","created_at":"2026-05-18T00:01:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.00659v1","created_at":"2026-05-18T00:01:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.00659","created_at":"2026-05-18T00:01:42Z"},{"alias_kind":"pith_short_12","alias_value":"4JIX4JC5OC6Q","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"4JIX4JC5OC6QMTEX","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"4JIX4JC5","created_at":"2026-05-18T12:32:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:4JIX4JC5OC6QMTEXYQZIW7GQRS","target":"record","payload":{"canonical_record":{"source":{"id":"1811.00659","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-01T22:24:25Z","cross_cats_sorted":["cs.LG","cs.NE"],"title_canon_sha256":"362c66e256c8e9498c7d996bca1f115c6879b368ca9e0c4ea5a4bc8462c588a0","abstract_canon_sha256":"33d9547b2d40d4cd68e13a59acc6a6c7dfddfd79bc898bf4629c156881bb853f"},"schema_version":"1.0"},"canonical_sha256":"e2517e245d70bd064c97c4328b7cd08cb42f894eff9e64307ef6ae852101b5b3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:42.553770Z","signature_b64":"bNzUfaPjbLcPvEvWR7LES163+tId+B3ZlbLFfhHHmyDbQxlwJpIeRy7qjSnPwCgavwl+3eXwUNUA2qJsHmA1CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e2517e245d70bd064c97c4328b7cd08cb42f894eff9e64307ef6ae852101b5b3","last_reissued_at":"2026-05-18T00:01:42.553311Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:42.553311Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.00659","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"E8VvSLWFwDVoN0ErfqeEpND/JTyV4At/6ziN+oCVrkEH7E2mIvV+UPFG1te2gf3sAzONTVuJgprMU1Yb9JP/Dg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T12:42:51.196665Z"},"content_sha256":"f1acaa0ab66a9efaa2ac843753784e3e528a00479557ec8c4dd367bf23b6754b","schema_version":"1.0","event_id":"sha256:f1acaa0ab66a9efaa2ac843753784e3e528a00479557ec8c4dd367bf23b6754b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:4JIX4JC5OC6QMTEXYQZIW7GQRS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Implicit Regularization of Stochastic Gradient Descent in Natural Language Processing: Observations and Implications","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"cs.CL","authors_text":"Deren Lei, William Yang Wang, Yijun Xiao, Zichen Sun","submitted_at":"2018-11-01T22:24:25Z","abstract_excerpt":"Deep neural networks with remarkably strong generalization performances are usually over-parameterized. Despite explicit regularization strategies are used for practitioners to avoid over-fitting, the impacts are often small. Some theoretical studies have analyzed the implicit regularization effect of stochastic gradient descent (SGD) on simple machine learning models with certain assumptions. However, how it behaves practically in state-of-the-art models and real-world datasets is still unknown. To bridge this gap, we study the role of SGD implicit regularization in deep learning systems. We "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.00659","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TPPkFgcbF5aK2V2SYkUbj9f0ZmkuXT03On9U+LDi4mntgMM44ObGJSipkEi8ns10afYCRIYmnVLjRtfh/i30Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T12:42:51.197025Z"},"content_sha256":"896e8e9bf85e7134318615fdb41e4cf3de8300c3159e25c2946bdced77b68863","schema_version":"1.0","event_id":"sha256:896e8e9bf85e7134318615fdb41e4cf3de8300c3159e25c2946bdced77b68863"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4JIX4JC5OC6QMTEXYQZIW7GQRS/bundle.json","state_url":"https://pith.science/pith/4JIX4JC5OC6QMTEXYQZIW7GQRS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4JIX4JC5OC6QMTEXYQZIW7GQRS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T12:42:51Z","links":{"resolver":"https://pith.science/pith/4JIX4JC5OC6QMTEXYQZIW7GQRS","bundle":"https://pith.science/pith/4JIX4JC5OC6QMTEXYQZIW7GQRS/bundle.json","state":"https://pith.science/pith/4JIX4JC5OC6QMTEXYQZIW7GQRS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4JIX4JC5OC6QMTEXYQZIW7GQRS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:4JIX4JC5OC6QMTEXYQZIW7GQRS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"33d9547b2d40d4cd68e13a59acc6a6c7dfddfd79bc898bf4629c156881bb853f","cross_cats_sorted":["cs.LG","cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-01T22:24:25Z","title_canon_sha256":"362c66e256c8e9498c7d996bca1f115c6879b368ca9e0c4ea5a4bc8462c588a0"},"schema_version":"1.0","source":{"id":"1811.00659","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.00659","created_at":"2026-05-18T00:01:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.00659v1","created_at":"2026-05-18T00:01:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.00659","created_at":"2026-05-18T00:01:42Z"},{"alias_kind":"pith_short_12","alias_value":"4JIX4JC5OC6Q","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_16","alias_value":"4JIX4JC5OC6QMTEX","created_at":"2026-05-18T12:32:05Z"},{"alias_kind":"pith_short_8","alias_value":"4JIX4JC5","created_at":"2026-05-18T12:32:05Z"}],"graph_snapshots":[{"event_id":"sha256:896e8e9bf85e7134318615fdb41e4cf3de8300c3159e25c2946bdced77b68863","target":"graph","created_at":"2026-05-18T00:01:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep neural networks with remarkably strong generalization performances are usually over-parameterized. Despite explicit regularization strategies are used for practitioners to avoid over-fitting, the impacts are often small. Some theoretical studies have analyzed the implicit regularization effect of stochastic gradient descent (SGD) on simple machine learning models with certain assumptions. However, how it behaves practically in state-of-the-art models and real-world datasets is still unknown. To bridge this gap, we study the role of SGD implicit regularization in deep learning systems. We ","authors_text":"Deren Lei, William Yang Wang, Yijun Xiao, Zichen Sun","cross_cats":["cs.LG","cs.NE"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-01T22:24:25Z","title":"Implicit Regularization of Stochastic Gradient Descent in Natural Language Processing: Observations and Implications"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.00659","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f1acaa0ab66a9efaa2ac843753784e3e528a00479557ec8c4dd367bf23b6754b","target":"record","created_at":"2026-05-18T00:01:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"33d9547b2d40d4cd68e13a59acc6a6c7dfddfd79bc898bf4629c156881bb853f","cross_cats_sorted":["cs.LG","cs.NE"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-01T22:24:25Z","title_canon_sha256":"362c66e256c8e9498c7d996bca1f115c6879b368ca9e0c4ea5a4bc8462c588a0"},"schema_version":"1.0","source":{"id":"1811.00659","kind":"arxiv","version":1}},"canonical_sha256":"e2517e245d70bd064c97c4328b7cd08cb42f894eff9e64307ef6ae852101b5b3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e2517e245d70bd064c97c4328b7cd08cb42f894eff9e64307ef6ae852101b5b3","first_computed_at":"2026-05-18T00:01:42.553311Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:01:42.553311Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bNzUfaPjbLcPvEvWR7LES163+tId+B3ZlbLFfhHHmyDbQxlwJpIeRy7qjSnPwCgavwl+3eXwUNUA2qJsHmA1CQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:01:42.553770Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.00659","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f1acaa0ab66a9efaa2ac843753784e3e528a00479557ec8c4dd367bf23b6754b","sha256:896e8e9bf85e7134318615fdb41e4cf3de8300c3159e25c2946bdced77b68863"],"state_sha256":"bd304270351036d567a64566248aac3eba3395f2607912d180a24c5c19b6385a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"meSOwg8KMjfrI8KDL+DNTPlQpgM42Yk4RSYz1noyS/S4P2JNeQOvLPXP26PWdeppZhGavehklXm4zgp9PCAsDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T12:42:51.199023Z","bundle_sha256":"ee8378bbbc9e57a9c42ac007ff6f5c900eedab5438e7f775090c306f240be940"}}