{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:KJHUGTUKCH5WBX5XHGMT7YA4UM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0ef18e4e3dc28985e5e79828a6ea30a2d1745b5c1328262cdee8cc049d38bfde","cross_cats_sorted":["cond-mat.dis-nn","cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-01-31T17:35:06Z","title_canon_sha256":"047fd8ba62904ed39763304aa146d718d5c503400e75dc714696a43222351e39"},"schema_version":"1.0","source":{"id":"2602.06065","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.06065","created_at":"2026-06-02T03:04:38Z"},{"alias_kind":"arxiv_version","alias_value":"2602.06065v3","created_at":"2026-06-02T03:04:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.06065","created_at":"2026-06-02T03:04:38Z"},{"alias_kind":"pith_short_12","alias_value":"KJHUGTUKCH5W","created_at":"2026-06-02T03:04:38Z"},{"alias_kind":"pith_short_16","alias_value":"KJHUGTUKCH5WBX5X","created_at":"2026-06-02T03:04:38Z"},{"alias_kind":"pith_short_8","alias_value":"KJHUGTUK","created_at":"2026-06-02T03:04:38Z"}],"graph_snapshots":[{"event_id":"sha256:16182382df540c32e6d70f5255e9437b879a8bf4172e5f9f45d1b6c21ba01e1c","target":"graph","created_at":"2026-06-02T03:04:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.06065/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Understanding how the structure of language can be learned from sentences alone is a central question in both cognitive science and machine learning. Studies of the internal representations of Large Language Models (LLMs) support their ability to parse text when predicting the next word, while representing semantic notions independently of surface form. Yet, which data statistics make these feats possible, and how much data is required, remain largely unknown. Probabilistic context-free grammars (PCFGs) provide a tractable testbed for studying these questions. However, prior work has focused e","authors_text":"Francesco Cagnetta, Jack T. Parley, Matthieu Wyart","cross_cats":["cond-mat.dis-nn","cs.CL","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-01-31T17:35:06Z","title":"Deep networks learn to parse uniform-depth context-free languages from local statistics"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.06065","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a33fcf9a16194bd763c1ba6bac147490ec4b462c267d47add397ab9f64826da0","target":"record","created_at":"2026-06-02T03:04:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0ef18e4e3dc28985e5e79828a6ea30a2d1745b5c1328262cdee8cc049d38bfde","cross_cats_sorted":["cond-mat.dis-nn","cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2026-01-31T17:35:06Z","title_canon_sha256":"047fd8ba62904ed39763304aa146d718d5c503400e75dc714696a43222351e39"},"schema_version":"1.0","source":{"id":"2602.06065","kind":"arxiv","version":3}},"canonical_sha256":"524f434e8a11fb60dfb739993fe01ca33551e87f6cf4da9118282c606be906a8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"524f434e8a11fb60dfb739993fe01ca33551e87f6cf4da9118282c606be906a8","first_computed_at":"2026-06-02T03:04:38.153665Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T03:04:38.153665Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"uaW5gQmBbnwEe+vS8C7TWHqzB7qWwO7t7X7uyhtV56QtwshhOYa3ZU+NrUMKTY+BPfMji83q/mJCt/EvEUT+Cw==","signature_status":"signed_v1","signed_at":"2026-06-02T03:04:38.154180Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.06065","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a33fcf9a16194bd763c1ba6bac147490ec4b462c267d47add397ab9f64826da0","sha256:16182382df540c32e6d70f5255e9437b879a8bf4172e5f9f45d1b6c21ba01e1c"],"state_sha256":"18ae3dc2f6208d6b990c718092cb73969592d14611bbc4125c16505e76be2968"}