{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:C46NGGTTDZ3SDKZYW55Z57NFCR","short_pith_number":"pith:C46NGGTT","schema_version":"1.0","canonical_sha256":"173cd31a731e7721ab38b77b9efda5147da2b3a82c31d1f88c5f54909435cb85","source":{"kind":"arxiv","id":"2605.28190","version":1},"attestation_state":"computed","paper":{"title":"The Harder Text Embedding Benchmark (HTEB): Beyond One-dimensional Static Robustness","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Haithem Afli, Manuel Frank","submitted_at":"2026-05-27T09:11:13Z","abstract_excerpt":"Embedding benchmarks like MTEB report a single score per model, implicitly treating robustness as a static, scalar property. We argue that embedding robustness is multidimensional, since models respond differently to different types of variation, and requires dynamic evaluation to expose failures hidden by static benchmarks. We introduce the Harder Text Embedding Benchmark (HTEB), a dynamic evaluation framework that challenges model robustness along three practically interpretable axes (Lexical/Stylistic, Length and Language) by stochastically transforming inputs at evaluation time with an LLM"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.28190","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-27T09:11:13Z","cross_cats_sorted":[],"title_canon_sha256":"4ac2a34b8bad185b76c19a269cf3424291b883c998483f93670b168da8f997d9","abstract_canon_sha256":"85e0adb09b7a88db3d553eb2a7a7122292f314d154e0211db43fd5659f9cfbc9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:05:01.726007Z","signature_b64":"22ddspDx4mxA2v9WE5AHvBdeDIOtlZz9MDI+v5ogodW9qeJ1ApTDhm1V8lBzdft4MCKIeC0h9vWZs/SZoOBLAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"173cd31a731e7721ab38b77b9efda5147da2b3a82c31d1f88c5f54909435cb85","last_reissued_at":"2026-05-28T01:05:01.725482Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:05:01.725482Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Harder Text Embedding Benchmark (HTEB): Beyond One-dimensional Static Robustness","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Haithem Afli, Manuel Frank","submitted_at":"2026-05-27T09:11:13Z","abstract_excerpt":"Embedding benchmarks like MTEB report a single score per model, implicitly treating robustness as a static, scalar property. We argue that embedding robustness is multidimensional, since models respond differently to different types of variation, and requires dynamic evaluation to expose failures hidden by static benchmarks. We introduce the Harder Text Embedding Benchmark (HTEB), a dynamic evaluation framework that challenges model robustness along three practically interpretable axes (Lexical/Stylistic, Length and Language) by stochastically transforming inputs at evaluation time with an LLM"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.28190","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.28190/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.28190","created_at":"2026-05-28T01:05:01.725542+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.28190v1","created_at":"2026-05-28T01:05:01.725542+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.28190","created_at":"2026-05-28T01:05:01.725542+00:00"},{"alias_kind":"pith_short_12","alias_value":"C46NGGTTDZ3S","created_at":"2026-05-28T01:05:01.725542+00:00"},{"alias_kind":"pith_short_16","alias_value":"C46NGGTTDZ3SDKZY","created_at":"2026-05-28T01:05:01.725542+00:00"},{"alias_kind":"pith_short_8","alias_value":"C46NGGTT","created_at":"2026-05-28T01:05:01.725542+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR","json":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR.json","graph_json":"https://pith.science/api/pith-number/C46NGGTTDZ3SDKZYW55Z57NFCR/graph.json","events_json":"https://pith.science/api/pith-number/C46NGGTTDZ3SDKZYW55Z57NFCR/events.json","paper":"https://pith.science/paper/C46NGGTT"},"agent_actions":{"view_html":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR","download_json":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR.json","view_paper":"https://pith.science/paper/C46NGGTT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.28190&json=true","fetch_graph":"https://pith.science/api/pith-number/C46NGGTTDZ3SDKZYW55Z57NFCR/graph.json","fetch_events":"https://pith.science/api/pith-number/C46NGGTTDZ3SDKZYW55Z57NFCR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR/action/storage_attestation","attest_author":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR/action/author_attestation","sign_citation":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR/action/citation_signature","submit_replication":"https://pith.science/pith/C46NGGTTDZ3SDKZYW55Z57NFCR/action/replication_record"}},"created_at":"2026-05-28T01:05:01.725542+00:00","updated_at":"2026-05-28T01:05:01.725542+00:00"}