{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:2GZ46PQKT24RSRQKHHXGKOD5Q6","short_pith_number":"pith:2GZ46PQK","canonical_record":{"source":{"id":"2606.19626","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-17T22:06:41Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"6b2faf4f8d85e89964501a3eecd8e11228496ef2b882190b2b0dc9f9409435b4","abstract_canon_sha256":"6e310cb8fc44cc6a5e53bb187b37681223305daa72ba727fb20524a90f524e0e"},"schema_version":"1.0"},"canonical_sha256":"d1b3cf3e0a9eb919460a39ee65387d87be312198fa254731ef48933d4d57240f","source":{"kind":"arxiv","id":"2606.19626","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.19626","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"arxiv_version","alias_value":"2606.19626v1","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19626","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"pith_short_12","alias_value":"2GZ46PQKT24R","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"pith_short_16","alias_value":"2GZ46PQKT24RSRQK","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"pith_short_8","alias_value":"2GZ46PQK","created_at":"2026-06-19T16:12:30Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:2GZ46PQKT24RSRQKHHXGKOD5Q6","target":"record","payload":{"canonical_record":{"source":{"id":"2606.19626","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-17T22:06:41Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"6b2faf4f8d85e89964501a3eecd8e11228496ef2b882190b2b0dc9f9409435b4","abstract_canon_sha256":"6e310cb8fc44cc6a5e53bb187b37681223305daa72ba727fb20524a90f524e0e"},"schema_version":"1.0"},"canonical_sha256":"d1b3cf3e0a9eb919460a39ee65387d87be312198fa254731ef48933d4d57240f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:30.648808Z","signature_b64":"F8dEPAbIR9V+U6DyS/7hx76lhpAZCavotg2xWDQ7Keg0HQrNGpNlqIf3dDKzXFdhxoO3LV3c5LGFwMGLR8XoBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d1b3cf3e0a9eb919460a39ee65387d87be312198fa254731ef48933d4d57240f","last_reissued_at":"2026-06-19T16:12:30.648424Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:30.648424Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.19626","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zDSVV8gWUjWFsERPr4f+xV6YcUYObiwKrgp6jOnEX3l9hIhCwJrPipqIOLm5o1EnUHuxynBH2Nk7pYXKT2kXCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T09:24:39.613583Z"},"content_sha256":"158e3fe8aa5286f839bd7d286634936e8e95f8fee7c0cc5b38057e33cd4de133","schema_version":"1.0","event_id":"sha256:158e3fe8aa5286f839bd7d286634936e8e95f8fee7c0cc5b38057e33cd4de133"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:2GZ46PQKT24RSRQKHHXGKOD5Q6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Toten: Knowledge-Based Ontological Tokenization Of Physical Quantities And Technical Notation In Brazilian Portuguese","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.AI","authors_text":"Antonio de Sousa Leit\\~ao Filho; Allan Kardec Duailibe Barros Filho; Fabr\\'icio Saul Lima; Selby Mykael Lima dos Santos; Rejani Bandeira Vieira Sousa","submitted_at":"2026-06-17T22:06:41Z","abstract_excerpt":"Byte-Pair Encoding tokenization is statistically efficient for vocabulary compression, but semantically blind to structured technical entities, fragmenting physical quantities, numbers, units, and symbolic expressions into lexically arbitrary subwords. We present TOTEN, a knowledge-based ontological tokenization framework that replaces statistical derivation with declarative classification grounded in a formal ontology of engineering entities (OEE). We formalize TOTEN as the triple <O, classify, {inst_tau}>: the ontology gathers types, structural principles, composition relations, and preserva"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19626","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.19626/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JNMSsiTrqL3GEx8Fab3e2yXRU/tRmWYE8/ytNRbp5SM7T5nj/PxQImvWdj17RhkukfrCprYXZAFLdQtbqKntCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T09:24:39.613953Z"},"content_sha256":"c51af97c461f629b9911287cd3c0f55575b7cb853c970dd90567acf1191e2487","schema_version":"1.0","event_id":"sha256:c51af97c461f629b9911287cd3c0f55575b7cb853c970dd90567acf1191e2487"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2GZ46PQKT24RSRQKHHXGKOD5Q6/bundle.json","state_url":"https://pith.science/pith/2GZ46PQKT24RSRQKHHXGKOD5Q6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2GZ46PQKT24RSRQKHHXGKOD5Q6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T09:24:39Z","links":{"resolver":"https://pith.science/pith/2GZ46PQKT24RSRQKHHXGKOD5Q6","bundle":"https://pith.science/pith/2GZ46PQKT24RSRQKHHXGKOD5Q6/bundle.json","state":"https://pith.science/pith/2GZ46PQKT24RSRQKHHXGKOD5Q6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2GZ46PQKT24RSRQKHHXGKOD5Q6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:2GZ46PQKT24RSRQKHHXGKOD5Q6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6e310cb8fc44cc6a5e53bb187b37681223305daa72ba727fb20524a90f524e0e","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-17T22:06:41Z","title_canon_sha256":"6b2faf4f8d85e89964501a3eecd8e11228496ef2b882190b2b0dc9f9409435b4"},"schema_version":"1.0","source":{"id":"2606.19626","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.19626","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"arxiv_version","alias_value":"2606.19626v1","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19626","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"pith_short_12","alias_value":"2GZ46PQKT24R","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"pith_short_16","alias_value":"2GZ46PQKT24RSRQK","created_at":"2026-06-19T16:12:30Z"},{"alias_kind":"pith_short_8","alias_value":"2GZ46PQK","created_at":"2026-06-19T16:12:30Z"}],"graph_snapshots":[{"event_id":"sha256:c51af97c461f629b9911287cd3c0f55575b7cb853c970dd90567acf1191e2487","target":"graph","created_at":"2026-06-19T16:12:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.19626/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Byte-Pair Encoding tokenization is statistically efficient for vocabulary compression, but semantically blind to structured technical entities, fragmenting physical quantities, numbers, units, and symbolic expressions into lexically arbitrary subwords. We present TOTEN, a knowledge-based ontological tokenization framework that replaces statistical derivation with declarative classification grounded in a formal ontology of engineering entities (OEE). We formalize TOTEN as the triple <O, classify, {inst_tau}>: the ontology gathers types, structural principles, composition relations, and preserva","authors_text":"Antonio de Sousa Leit\\~ao Filho; Allan Kardec Duailibe Barros Filho; Fabr\\'icio Saul Lima; Selby Mykael Lima dos Santos; Rejani Bandeira Vieira Sousa","cross_cats":["cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-17T22:06:41Z","title":"Toten: Knowledge-Based Ontological Tokenization Of Physical Quantities And Technical Notation In Brazilian Portuguese"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19626","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:158e3fe8aa5286f839bd7d286634936e8e95f8fee7c0cc5b38057e33cd4de133","target":"record","created_at":"2026-06-19T16:12:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6e310cb8fc44cc6a5e53bb187b37681223305daa72ba727fb20524a90f524e0e","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-17T22:06:41Z","title_canon_sha256":"6b2faf4f8d85e89964501a3eecd8e11228496ef2b882190b2b0dc9f9409435b4"},"schema_version":"1.0","source":{"id":"2606.19626","kind":"arxiv","version":1}},"canonical_sha256":"d1b3cf3e0a9eb919460a39ee65387d87be312198fa254731ef48933d4d57240f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d1b3cf3e0a9eb919460a39ee65387d87be312198fa254731ef48933d4d57240f","first_computed_at":"2026-06-19T16:12:30.648424Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:12:30.648424Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"F8dEPAbIR9V+U6DyS/7hx76lhpAZCavotg2xWDQ7Keg0HQrNGpNlqIf3dDKzXFdhxoO3LV3c5LGFwMGLR8XoBA==","signature_status":"signed_v1","signed_at":"2026-06-19T16:12:30.648808Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.19626","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:158e3fe8aa5286f839bd7d286634936e8e95f8fee7c0cc5b38057e33cd4de133","sha256:c51af97c461f629b9911287cd3c0f55575b7cb853c970dd90567acf1191e2487"],"state_sha256":"f0ecfc08df6ebf1705f67cb021b9884f36986eaf556372fe78f5527f32773c0f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Kwxwxnk7k+namxG90Ak8SdcRVLaycCP9aWYxzSiTFqyqDBzGTZjkEb9Ot6756M79SohIQIZTezx8PrTlA29EDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T09:24:39.616075Z","bundle_sha256":"0c9397cd2e4c8e54c61f8819df54a4027be63bc755d5cbc54cead548d2d6c3e6"}}