{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:GKKRPTQS5RHEDHZ33SY7SZOWDC","short_pith_number":"pith:GKKRPTQS","canonical_record":{"source":{"id":"2511.23071","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-11-28T10:58:37Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"9bac0e4876f7109f11a8e169d2540d863b5e5ed4cdd4a7693c85ee17f8ad266e","abstract_canon_sha256":"d688b4728469a6338d6e132ed1dc1d2fac2a9a374b09b9bc7024e8c9878f698b"},"schema_version":"1.0"},"canonical_sha256":"329517ce12ec4e419f3bdcb1f965d6188f622cbc16c6c61b81246484ce23447a","source":{"kind":"arxiv","id":"2511.23071","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.23071","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"arxiv_version","alias_value":"2511.23071v2","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.23071","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"pith_short_12","alias_value":"GKKRPTQS5RHE","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"pith_short_16","alias_value":"GKKRPTQS5RHEDHZ3","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"pith_short_8","alias_value":"GKKRPTQS","created_at":"2026-06-19T16:12:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:GKKRPTQS5RHEDHZ33SY7SZOWDC","target":"record","payload":{"canonical_record":{"source":{"id":"2511.23071","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-11-28T10:58:37Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"9bac0e4876f7109f11a8e169d2540d863b5e5ed4cdd4a7693c85ee17f8ad266e","abstract_canon_sha256":"d688b4728469a6338d6e132ed1dc1d2fac2a9a374b09b9bc7024e8c9878f698b"},"schema_version":"1.0"},"canonical_sha256":"329517ce12ec4e419f3bdcb1f965d6188f622cbc16c6c61b81246484ce23447a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:49.349472Z","signature_b64":"MXllem+MOA2eLB1HbcSEbMs3P2rZuxEr39lQ+NnHtn3ofWC9fasgBhp4sGb0DH284ucuno518ENwcIZKp4whAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"329517ce12ec4e419f3bdcb1f965d6188f622cbc16c6c61b81246484ce23447a","last_reissued_at":"2026-06-19T16:12:49.349052Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:49.349052Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2511.23071","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lZn4eql/N24qn8y/llNEtnvJgoqNxixlvdKGW/V+tx1tu16lxbVTQLa+WwvKhD7jd4UjOccBx6aQ73SZzdfLCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T06:57:54.140688Z"},"content_sha256":"4f6af71b3f948b85dd4069ea0e9edfc6acf6099b3226f72b5ec7879e676d11a6","schema_version":"1.0","event_id":"sha256:4f6af71b3f948b85dd4069ea0e9edfc6acf6099b3226f72b5ec7879e676d11a6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:GKKRPTQS5RHEDHZ33SY7SZOWDC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Bharat Scene Text: A Novel Comprehensive Dataset and Benchmark for Indian Language Scene Text Understanding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A new dataset of over 100K words benchmarks scene text recognition for 11 Indian languages and English.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Abhirama Subramanyam Penamakuri, Aditya Rathore, Anand Mishra, Anik De, Devesh Sharma, Harshiv Shah, Pravin Kumar, Rajeev Yadav, Sagar Agarwal","submitted_at":"2025-11-28T10:58:37Z","abstract_excerpt":"Reading scene text, that is, text appearing in images, has numerous application areas, including assistive technology, search, and e-commerce. Although scene text recognition in English has advanced significantly and is often considered nearly a solved problem, Indian language scene text recognition remains an open challenge. This is due to script diversity, non-standard fonts, and varying writing styles, and, more importantly, the lack of high-quality datasets and open-source models. To address these gaps, we introduce the Bharat Scene Text Dataset (BSTD) - a large-scale and comprehensive ben"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"we introduce the Bharat Scene Text Dataset (BSTD) - a large-scale and comprehensive benchmark for studying Indian Language Scene Text Recognition. It comprises more than 100K words that span 11 Indian languages and English, sourced from over 6,500 scene images captured across various linguistic regions of India.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The collected images and annotations are sufficiently diverse, high-quality, and representative of real-world Indian script variations to meaningfully advance recognition performance when English models are fine-tuned on them.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Bharat Scene Text Dataset supplies a large-scale, multi-task benchmark for scene text understanding across 11 Indian languages and English to address data scarcity in non-Latin scripts.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A new dataset of over 100K words benchmarks scene text recognition for 11 Indian languages and English.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"1e1c982eee617f9a320cbaf6a78bad669b0f2c0ace9e60afcc9d45a59e56d1d0"},"source":{"id":"2511.23071","kind":"arxiv","version":2},"verdict":{"id":"dc3d0a5a-4109-4748-a8e4-6a4f4d144550","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T03:53:01.175866Z","strongest_claim":"we introduce the Bharat Scene Text Dataset (BSTD) - a large-scale and comprehensive benchmark for studying Indian Language Scene Text Recognition. It comprises more than 100K words that span 11 Indian languages and English, sourced from over 6,500 scene images captured across various linguistic regions of India.","one_line_summary":"Bharat Scene Text Dataset supplies a large-scale, multi-task benchmark for scene text understanding across 11 Indian languages and English to address data scarcity in non-Latin scripts.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The collected images and annotations are sufficiently diverse, high-quality, and representative of real-world Indian script variations to meaningfully advance recognition performance when English models are fine-tuned on them.","pith_extraction_headline":"A new dataset of over 100K words benchmarks scene text recognition for 11 Indian languages and English."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2511.23071/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b79147426add9dc61d79179135bfe6b3f8a8594959a5d25459e01088b4d1be42"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"dc3d0a5a-4109-4748-a8e4-6a4f4d144550"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PrHyGwN06t851GQoz32T2OM/hZAq555Vrs50Eyd6C8UdYfC+/ETx1Mut+EwdFS3dU83mDHgOxTgHLfsFcFhCAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T06:57:54.141143Z"},"content_sha256":"fa2a512359de5c32c1703ed6e287ae45ea5a01b9049443f2e3ba2d1ee580885f","schema_version":"1.0","event_id":"sha256:fa2a512359de5c32c1703ed6e287ae45ea5a01b9049443f2e3ba2d1ee580885f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GKKRPTQS5RHEDHZ33SY7SZOWDC/bundle.json","state_url":"https://pith.science/pith/GKKRPTQS5RHEDHZ33SY7SZOWDC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GKKRPTQS5RHEDHZ33SY7SZOWDC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-28T06:57:54Z","links":{"resolver":"https://pith.science/pith/GKKRPTQS5RHEDHZ33SY7SZOWDC","bundle":"https://pith.science/pith/GKKRPTQS5RHEDHZ33SY7SZOWDC/bundle.json","state":"https://pith.science/pith/GKKRPTQS5RHEDHZ33SY7SZOWDC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GKKRPTQS5RHEDHZ33SY7SZOWDC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:GKKRPTQS5RHEDHZ33SY7SZOWDC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d688b4728469a6338d6e132ed1dc1d2fac2a9a374b09b9bc7024e8c9878f698b","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-11-28T10:58:37Z","title_canon_sha256":"9bac0e4876f7109f11a8e169d2540d863b5e5ed4cdd4a7693c85ee17f8ad266e"},"schema_version":"1.0","source":{"id":"2511.23071","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.23071","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"arxiv_version","alias_value":"2511.23071v2","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.23071","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"pith_short_12","alias_value":"GKKRPTQS5RHE","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"pith_short_16","alias_value":"GKKRPTQS5RHEDHZ3","created_at":"2026-06-19T16:12:49Z"},{"alias_kind":"pith_short_8","alias_value":"GKKRPTQS","created_at":"2026-06-19T16:12:49Z"}],"graph_snapshots":[{"event_id":"sha256:fa2a512359de5c32c1703ed6e287ae45ea5a01b9049443f2e3ba2d1ee580885f","target":"graph","created_at":"2026-06-19T16:12:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"we introduce the Bharat Scene Text Dataset (BSTD) - a large-scale and comprehensive benchmark for studying Indian Language Scene Text Recognition. It comprises more than 100K words that span 11 Indian languages and English, sourced from over 6,500 scene images captured across various linguistic regions of India."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The collected images and annotations are sufficiently diverse, high-quality, and representative of real-world Indian script variations to meaningfully advance recognition performance when English models are fine-tuned on them."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Bharat Scene Text Dataset supplies a large-scale, multi-task benchmark for scene text understanding across 11 Indian languages and English to address data scarcity in non-Latin scripts."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A new dataset of over 100K words benchmarks scene text recognition for 11 Indian languages and English."}],"snapshot_sha256":"1e1c982eee617f9a320cbaf6a78bad669b0f2c0ace9e60afcc9d45a59e56d1d0"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b79147426add9dc61d79179135bfe6b3f8a8594959a5d25459e01088b4d1be42"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2511.23071/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reading scene text, that is, text appearing in images, has numerous application areas, including assistive technology, search, and e-commerce. Although scene text recognition in English has advanced significantly and is often considered nearly a solved problem, Indian language scene text recognition remains an open challenge. This is due to script diversity, non-standard fonts, and varying writing styles, and, more importantly, the lack of high-quality datasets and open-source models. To address these gaps, we introduce the Bharat Scene Text Dataset (BSTD) - a large-scale and comprehensive ben","authors_text":"Abhirama Subramanyam Penamakuri, Aditya Rathore, Anand Mishra, Anik De, Devesh Sharma, Harshiv Shah, Pravin Kumar, Rajeev Yadav, Sagar Agarwal","cross_cats":["cs.AI","cs.CL"],"headline":"A new dataset of over 100K words benchmarks scene text recognition for 11 Indian languages and English.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-11-28T10:58:37Z","title":"Bharat Scene Text: A Novel Comprehensive Dataset and Benchmark for Indian Language Scene Text Understanding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.23071","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T03:53:01.175866Z","id":"dc3d0a5a-4109-4748-a8e4-6a4f4d144550","model_set":{"reader":"grok-4.3"},"one_line_summary":"Bharat Scene Text Dataset supplies a large-scale, multi-task benchmark for scene text understanding across 11 Indian languages and English to address data scarcity in non-Latin scripts.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A new dataset of over 100K words benchmarks scene text recognition for 11 Indian languages and English.","strongest_claim":"we introduce the Bharat Scene Text Dataset (BSTD) - a large-scale and comprehensive benchmark for studying Indian Language Scene Text Recognition. It comprises more than 100K words that span 11 Indian languages and English, sourced from over 6,500 scene images captured across various linguistic regions of India.","weakest_assumption":"The collected images and annotations are sufficiently diverse, high-quality, and representative of real-world Indian script variations to meaningfully advance recognition performance when English models are fine-tuned on them."}},"verdict_id":"dc3d0a5a-4109-4748-a8e4-6a4f4d144550"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4f6af71b3f948b85dd4069ea0e9edfc6acf6099b3226f72b5ec7879e676d11a6","target":"record","created_at":"2026-06-19T16:12:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d688b4728469a6338d6e132ed1dc1d2fac2a9a374b09b9bc7024e8c9878f698b","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-11-28T10:58:37Z","title_canon_sha256":"9bac0e4876f7109f11a8e169d2540d863b5e5ed4cdd4a7693c85ee17f8ad266e"},"schema_version":"1.0","source":{"id":"2511.23071","kind":"arxiv","version":2}},"canonical_sha256":"329517ce12ec4e419f3bdcb1f965d6188f622cbc16c6c61b81246484ce23447a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"329517ce12ec4e419f3bdcb1f965d6188f622cbc16c6c61b81246484ce23447a","first_computed_at":"2026-06-19T16:12:49.349052Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:12:49.349052Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"MXllem+MOA2eLB1HbcSEbMs3P2rZuxEr39lQ+NnHtn3ofWC9fasgBhp4sGb0DH284ucuno518ENwcIZKp4whAg==","signature_status":"signed_v1","signed_at":"2026-06-19T16:12:49.349472Z","signed_message":"canonical_sha256_bytes"},"source_id":"2511.23071","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4f6af71b3f948b85dd4069ea0e9edfc6acf6099b3226f72b5ec7879e676d11a6","sha256:fa2a512359de5c32c1703ed6e287ae45ea5a01b9049443f2e3ba2d1ee580885f"],"state_sha256":"300fb1929127efcb37664eeedef91ec8ce20496efababc00204442a3f430015a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"miA1JAHrHuBTiOMeTrek4UkXm+R/yhyDsnX0ClJeHVPLAD5CqnTxJvKfcqf7huSjc6ld0C2rQK3xDlYI3V4ABg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-28T06:57:54.143697Z","bundle_sha256":"31d647b2c402d880238756db2c02cbe627f93f917e22da63bdb6ff7c7e44f499"}}