{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:4YDNK5Z5TSUJPMLUU27HFUOP5P","short_pith_number":"pith:4YDNK5Z5","schema_version":"1.0","canonical_sha256":"e606d5773d9ca897b174a6be72d1cfebd75c90018a827838b69103e966895d72","source":{"kind":"arxiv","id":"2510.07500","version":3},"attestation_state":"computed","paper":{"title":"Black-Box Detection of LLM-Generated Text Using Generalized Jensen-Shannon Divergence","license":"http://creativecommons.org/licenses/by/4.0/","headline":"SurpMark detects LLM-generated text by comparing the transition patterns of discretized token surprisals to fixed human and machine reference matrices using generalized Jensen-Shannon divergence.","cross_cats":["cs.IT","math.IT"],"primary_cat":"cs.LG","authors_text":"Ashish Khisti, Shuangyi Chen","submitted_at":"2025-10-08T19:53:11Z","abstract_excerpt":"We study black-box detection of machine-generated text under practical constraints: the scoring model (proxy LM) may mismatch the unknown source model, and per-input contrastive generation is costly. We propose SurpMark, a reference-based detector that summarizes a passage by the dynamics of its token surprisals. SurpMark discretizes surprisals into interpretable states, estimates a state-transition matrix for the test text, and scores it via a generalized Jensen-Shannon (GJS) gap between the test transitions and two fixed references (human vs. machine) built once from existing corpora. Theore"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":true},"canonical_record":{"source":{"id":"2510.07500","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-08T19:53:11Z","cross_cats_sorted":["cs.IT","math.IT"],"title_canon_sha256":"ccab6a947b5864440e86c38a5478de5df8fa53645a3d891b075a227b4e3775bc","abstract_canon_sha256":"80e8072731cca731fe40e90ad96b168067d31dab20a4a6f3d3b3c9367a4e1ebb"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:13:48.677945Z","signature_b64":"Q/Ri3ZEXCwDzYAp45bCvOGDUuxaG4TROtbNz26ArCpNGFSABmK+zOf3npwT6n3LmmDBY2y9G3q2Qv3QnoJr1Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e606d5773d9ca897b174a6be72d1cfebd75c90018a827838b69103e966895d72","last_reissued_at":"2026-06-23T03:13:48.677481Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:13:48.677481Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Black-Box Detection of LLM-Generated Text Using Generalized Jensen-Shannon Divergence","license":"http://creativecommons.org/licenses/by/4.0/","headline":"SurpMark detects LLM-generated text by comparing the transition patterns of discretized token surprisals to fixed human and machine reference matrices using generalized Jensen-Shannon divergence.","cross_cats":["cs.IT","math.IT"],"primary_cat":"cs.LG","authors_text":"Ashish Khisti, Shuangyi Chen","submitted_at":"2025-10-08T19:53:11Z","abstract_excerpt":"We study black-box detection of machine-generated text under practical constraints: the scoring model (proxy LM) may mismatch the unknown source model, and per-input contrastive generation is costly. We propose SurpMark, a reference-based detector that summarizes a passage by the dynamics of its token surprisals. SurpMark discretizes surprisals into interpretable states, estimates a state-transition matrix for the test text, and scores it via a generalized Jensen-Shannon (GJS) gap between the test transitions and two fixed references (human vs. machine) built once from existing corpora. Theore"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Empirically, across multiple datasets, source models, and scenarios, SurpMark consistently matches or surpasses baselines, demonstrating strong robustness across domains and generators.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That fixed reference transition matrices built once from existing human and machine corpora remain discriminative even when the test text comes from unseen domains, generators, or when the proxy LM differs substantially from the unknown source model.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SurpMark detects machine-generated text by estimating state-transition matrices from discretized surprisals and scoring them with generalized Jensen-Shannon divergence to human versus machine references.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"SurpMark detects LLM-generated text by comparing the transition patterns of discretized token surprisals to fixed human and machine reference matrices using generalized Jensen-Shannon divergence.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"8ca8101fd3d6646080275a8c92f1eb420eff7934c6cccde1ba14b186d14a5476"},"source":{"id":"2510.07500","kind":"arxiv","version":3},"verdict":{"id":"0e5c2e33-7efd-4829-8859-341495bbd439","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-18T08:41:10.422704Z","strongest_claim":"Empirically, across multiple datasets, source models, and scenarios, SurpMark consistently matches or surpasses baselines, demonstrating strong robustness across domains and generators.","one_line_summary":"SurpMark detects machine-generated text by estimating state-transition matrices from discretized surprisals and scoring them with generalized Jensen-Shannon divergence to human versus machine references.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That fixed reference transition matrices built once from existing human and machine corpora remain discriminative even when the test text comes from unseen domains, generators, or when the proxy LM differs substantially from the unknown source model.","pith_extraction_headline":"SurpMark detects LLM-generated text by comparing the transition patterns of discretized token surprisals to fixed human and machine reference matrices using generalized Jensen-Shannon divergence."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.07500/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"bde6b2f50a6994c764e22ea8a3740f1039610c24ed289e42e4311a88ccbc66c6"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.07500","created_at":"2026-06-23T03:13:48.677545+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.07500v3","created_at":"2026-06-23T03:13:48.677545+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.07500","created_at":"2026-06-23T03:13:48.677545+00:00"},{"alias_kind":"pith_short_12","alias_value":"4YDNK5Z5TSUJ","created_at":"2026-06-23T03:13:48.677545+00:00"},{"alias_kind":"pith_short_16","alias_value":"4YDNK5Z5TSUJPMLU","created_at":"2026-06-23T03:13:48.677545+00:00"},{"alias_kind":"pith_short_8","alias_value":"4YDNK5Z5","created_at":"2026-06-23T03:13:48.677545+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P","json":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P.json","graph_json":"https://pith.science/api/pith-number/4YDNK5Z5TSUJPMLUU27HFUOP5P/graph.json","events_json":"https://pith.science/api/pith-number/4YDNK5Z5TSUJPMLUU27HFUOP5P/events.json","paper":"https://pith.science/paper/4YDNK5Z5"},"agent_actions":{"view_html":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P","download_json":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P.json","view_paper":"https://pith.science/paper/4YDNK5Z5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.07500&json=true","fetch_graph":"https://pith.science/api/pith-number/4YDNK5Z5TSUJPMLUU27HFUOP5P/graph.json","fetch_events":"https://pith.science/api/pith-number/4YDNK5Z5TSUJPMLUU27HFUOP5P/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P/action/storage_attestation","attest_author":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P/action/author_attestation","sign_citation":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P/action/citation_signature","submit_replication":"https://pith.science/pith/4YDNK5Z5TSUJPMLUU27HFUOP5P/action/replication_record"}},"created_at":"2026-06-23T03:13:48.677545+00:00","updated_at":"2026-06-23T03:13:48.677545+00:00"}