{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:XHRQXUERWOBFBW4D6KTH2VQRN3","short_pith_number":"pith:XHRQXUER","canonical_record":{"source":{"id":"2605.05253","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-05T20:23:38Z","cross_cats_sorted":[],"title_canon_sha256":"205f9f9c4b51ea56cf5d260f0cd75b11eaf5a8a0065546fbb84c8c93dd8044ff","abstract_canon_sha256":"2667f75ac3234a8db463535b608b0cd23315241544b0840e31ab0d297bc30d4b"},"schema_version":"1.0"},"canonical_sha256":"b9e30bd091b38250db83f2a67d56116eef37206988e7773fd50b96880ad73fa4","source":{"kind":"arxiv","id":"2605.05253","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.05253","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"arxiv_version","alias_value":"2605.05253v2","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.05253","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"pith_short_12","alias_value":"XHRQXUERWOBF","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"pith_short_16","alias_value":"XHRQXUERWOBFBW4D","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"pith_short_8","alias_value":"XHRQXUER","created_at":"2026-05-21T01:04:26Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:XHRQXUERWOBFBW4D6KTH2VQRN3","target":"record","payload":{"canonical_record":{"source":{"id":"2605.05253","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-05T20:23:38Z","cross_cats_sorted":[],"title_canon_sha256":"205f9f9c4b51ea56cf5d260f0cd75b11eaf5a8a0065546fbb84c8c93dd8044ff","abstract_canon_sha256":"2667f75ac3234a8db463535b608b0cd23315241544b0840e31ab0d297bc30d4b"},"schema_version":"1.0"},"canonical_sha256":"b9e30bd091b38250db83f2a67d56116eef37206988e7773fd50b96880ad73fa4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:26.889507Z","signature_b64":"9QN/TRnXzO41eyQCPtHA+rcnREyCVkqYAEL2cnX4dI13DVYsfKoiRHPAsxl1zDnqK2wIR7M5dJtvvLXSJFjzBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b9e30bd091b38250db83f2a67d56116eef37206988e7773fd50b96880ad73fa4","last_reissued_at":"2026-05-21T01:04:26.888551Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:26.888551Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.05253","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"p22u9BAetlY2KMfNeJXlKHOlcX0Ap8/Be14/WVcOsTNlZFyNzRVQ5acsi0THlEGH04KoLu53fFJYHgb89Nh+Dg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T09:25:16.499827Z"},"content_sha256":"7bbd62f9bdd9dab55160b40e882069fe26658810b970c1e37efb80e743a8e51d","schema_version":"1.0","event_id":"sha256:7bbd62f9bdd9dab55160b40e882069fe26658810b970c1e37efb80e743a8e51d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:XHRQXUERWOBFBW4D6KTH2VQRN3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"EnterpriseRAG-Bench: A RAG Benchmark for Company Internal Knowledge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A new benchmark supplies 500,000 synthetic enterprise documents and 500 questions to evaluate retrieval-augmented generation on company-internal knowledge.","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Chris Weaver, Joachim Rahmfeld, Mark H. Butler, Roshan Desai, Weijia Chen, Wenxi Huang, Yuhong Sun","submitted_at":"2026-05-05T20:23:38Z","abstract_excerpt":"Retrieval-Augmented Generation (RAG) has become the standard approach for grounding large language models in information that was not available during training. While existing datasets and benchmarks focus on web or other public sources, there is still no widely adopted dataset that realistically reflects the nature of company-internal knowledge. Meanwhile, startups, enterprises, and researchers are increasingly developing AI Agents designed to operate over exactly this kind of proprietary data. To close this gap, we release a synthetic enterprise corpus, its generation framework, and a leader"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We present EnterpriseRAG-Bench, a dataset consisting of approximately 500,000 documents spanning nine enterprise source types and 500 questions across ten categories that test distinct retrieval and reasoning capabilities.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The synthetic corpus with cross-document coherence and added noise such as misfiled documents and conflicting information realistically reflects real company-internal knowledge.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"EnterpriseRAG-Bench supplies a synthetic corpus of 500,000 documents across Slack, Gmail, GitHub and other tools plus 500 questions that probe lookup, multi-document reasoning, conflict resolution and absence detection.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A new benchmark supplies 500,000 synthetic enterprise documents and 500 questions to evaluate retrieval-augmented generation on company-internal knowledge.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"453fc850d43ddb584b8895803d492f42cf1f17305ecc718456a9574e57c0a92c"},"source":{"id":"2605.05253","kind":"arxiv","version":2},"verdict":{"id":"57d4be32-58b2-4e01-aa20-dd5c650237c1","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-08T17:11:14.488888Z","strongest_claim":"We present EnterpriseRAG-Bench, a dataset consisting of approximately 500,000 documents spanning nine enterprise source types and 500 questions across ten categories that test distinct retrieval and reasoning capabilities.","one_line_summary":"EnterpriseRAG-Bench supplies a synthetic corpus of 500,000 documents across Slack, Gmail, GitHub and other tools plus 500 questions that probe lookup, multi-document reasoning, conflict resolution and absence detection.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The synthetic corpus with cross-document coherence and added noise such as misfiled documents and conflicting information realistically reflects real company-internal knowledge.","pith_extraction_headline":"A new benchmark supplies 500,000 synthetic enterprise documents and 500 questions to evaluate retrieval-augmented generation on company-internal knowledge."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.05253/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-20T12:35:32.981053Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T23:31:20.777337Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T14:38:20.587400Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"48a391d6ce5d769ee8ab0bdc67d9e861e9d19b11fb3bacab9bc5726e03559f23"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"57d4be32-58b2-4e01-aa20-dd5c650237c1"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RY4JtBWwwWPgI3mheJZ9+zOd5q3zZpQ04iBlMC6rEY+v7DaLemHQ86tuY0lD4i2pal6DRmuQCDKfsO/a51riCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T09:25:16.500311Z"},"content_sha256":"cbd8390e5adf9874c5c835f61bfa1ad89ab7cf3945d48b55e1068b04d85a0f2b","schema_version":"1.0","event_id":"sha256:cbd8390e5adf9874c5c835f61bfa1ad89ab7cf3945d48b55e1068b04d85a0f2b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XHRQXUERWOBFBW4D6KTH2VQRN3/bundle.json","state_url":"https://pith.science/pith/XHRQXUERWOBFBW4D6KTH2VQRN3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XHRQXUERWOBFBW4D6KTH2VQRN3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T09:25:16Z","links":{"resolver":"https://pith.science/pith/XHRQXUERWOBFBW4D6KTH2VQRN3","bundle":"https://pith.science/pith/XHRQXUERWOBFBW4D6KTH2VQRN3/bundle.json","state":"https://pith.science/pith/XHRQXUERWOBFBW4D6KTH2VQRN3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XHRQXUERWOBFBW4D6KTH2VQRN3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:XHRQXUERWOBFBW4D6KTH2VQRN3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2667f75ac3234a8db463535b608b0cd23315241544b0840e31ab0d297bc30d4b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-05T20:23:38Z","title_canon_sha256":"205f9f9c4b51ea56cf5d260f0cd75b11eaf5a8a0065546fbb84c8c93dd8044ff"},"schema_version":"1.0","source":{"id":"2605.05253","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.05253","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"arxiv_version","alias_value":"2605.05253v2","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.05253","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"pith_short_12","alias_value":"XHRQXUERWOBF","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"pith_short_16","alias_value":"XHRQXUERWOBFBW4D","created_at":"2026-05-21T01:04:26Z"},{"alias_kind":"pith_short_8","alias_value":"XHRQXUER","created_at":"2026-05-21T01:04:26Z"}],"graph_snapshots":[{"event_id":"sha256:cbd8390e5adf9874c5c835f61bfa1ad89ab7cf3945d48b55e1068b04d85a0f2b","target":"graph","created_at":"2026-05-21T01:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We present EnterpriseRAG-Bench, a dataset consisting of approximately 500,000 documents spanning nine enterprise source types and 500 questions across ten categories that test distinct retrieval and reasoning capabilities."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The synthetic corpus with cross-document coherence and added noise such as misfiled documents and conflicting information realistically reflects real company-internal knowledge."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"EnterpriseRAG-Bench supplies a synthetic corpus of 500,000 documents across Slack, Gmail, GitHub and other tools plus 500 questions that probe lookup, multi-document reasoning, conflict resolution and absence detection."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A new benchmark supplies 500,000 synthetic enterprise documents and 500 questions to evaluate retrieval-augmented generation on company-internal knowledge."}],"snapshot_sha256":"453fc850d43ddb584b8895803d492f42cf1f17305ecc718456a9574e57c0a92c"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-20T12:35:32.981053Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T23:31:20.777337Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T14:38:20.587400Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.05253/integrity.json","findings":[],"snapshot_sha256":"48a391d6ce5d769ee8ab0bdc67d9e861e9d19b11fb3bacab9bc5726e03559f23","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Retrieval-Augmented Generation (RAG) has become the standard approach for grounding large language models in information that was not available during training. While existing datasets and benchmarks focus on web or other public sources, there is still no widely adopted dataset that realistically reflects the nature of company-internal knowledge. Meanwhile, startups, enterprises, and researchers are increasingly developing AI Agents designed to operate over exactly this kind of proprietary data. To close this gap, we release a synthetic enterprise corpus, its generation framework, and a leader","authors_text":"Chris Weaver, Joachim Rahmfeld, Mark H. Butler, Roshan Desai, Weijia Chen, Wenxi Huang, Yuhong Sun","cross_cats":[],"headline":"A new benchmark supplies 500,000 synthetic enterprise documents and 500 questions to evaluate retrieval-augmented generation on company-internal knowledge.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-05T20:23:38Z","title":"EnterpriseRAG-Bench: A RAG Benchmark for Company Internal Knowledge"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.05253","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-08T17:11:14.488888Z","id":"57d4be32-58b2-4e01-aa20-dd5c650237c1","model_set":{"reader":"grok-4.3"},"one_line_summary":"EnterpriseRAG-Bench supplies a synthetic corpus of 500,000 documents across Slack, Gmail, GitHub and other tools plus 500 questions that probe lookup, multi-document reasoning, conflict resolution and absence detection.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A new benchmark supplies 500,000 synthetic enterprise documents and 500 questions to evaluate retrieval-augmented generation on company-internal knowledge.","strongest_claim":"We present EnterpriseRAG-Bench, a dataset consisting of approximately 500,000 documents spanning nine enterprise source types and 500 questions across ten categories that test distinct retrieval and reasoning capabilities.","weakest_assumption":"The synthetic corpus with cross-document coherence and added noise such as misfiled documents and conflicting information realistically reflects real company-internal knowledge."}},"verdict_id":"57d4be32-58b2-4e01-aa20-dd5c650237c1"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7bbd62f9bdd9dab55160b40e882069fe26658810b970c1e37efb80e743a8e51d","target":"record","created_at":"2026-05-21T01:04:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2667f75ac3234a8db463535b608b0cd23315241544b0840e31ab0d297bc30d4b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-05T20:23:38Z","title_canon_sha256":"205f9f9c4b51ea56cf5d260f0cd75b11eaf5a8a0065546fbb84c8c93dd8044ff"},"schema_version":"1.0","source":{"id":"2605.05253","kind":"arxiv","version":2}},"canonical_sha256":"b9e30bd091b38250db83f2a67d56116eef37206988e7773fd50b96880ad73fa4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b9e30bd091b38250db83f2a67d56116eef37206988e7773fd50b96880ad73fa4","first_computed_at":"2026-05-21T01:04:26.888551Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:04:26.888551Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9QN/TRnXzO41eyQCPtHA+rcnREyCVkqYAEL2cnX4dI13DVYsfKoiRHPAsxl1zDnqK2wIR7M5dJtvvLXSJFjzBg==","signature_status":"signed_v1","signed_at":"2026-05-21T01:04:26.889507Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.05253","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7bbd62f9bdd9dab55160b40e882069fe26658810b970c1e37efb80e743a8e51d","sha256:cbd8390e5adf9874c5c835f61bfa1ad89ab7cf3945d48b55e1068b04d85a0f2b"],"state_sha256":"cc6110ae7bab88cc7b37df9a3b0d3029e14c278120acebc72b182cf322a0a5d3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8lOjEbcC+VX91sMxrYfrNFoqYVkDYnEUbjybj0YQi8PK/00VXcNjtQfu6jnnSwOfAE5xf9Rv5zpfm3qiuLoKAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T09:25:16.502515Z","bundle_sha256":"3ad4f1136358cfbe0be5f4c9b8825da9843fbe486cc12ba679a14bf5a0c8b6e9"}}