{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:4CJSATOGOWUPEUWTQGWKQU6WB7","short_pith_number":"pith:4CJSATOG","schema_version":"1.0","canonical_sha256":"e093204dc675a8f252d381aca853d60ff78a4aeadba00e4a9621a4d1ee5d65b8","source":{"kind":"arxiv","id":"2606.12385","version":1},"attestation_state":"computed","paper":{"title":"Which Models Are Our Models Built On? Auditing Invisible Dependencies in Modern LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Haoxiang Sun, Sanjay Adhikesaven, Sewon Min","submitted_at":"2026-06-10T17:47:59Z","abstract_excerpt":"Modern LLM training pipelines increasingly rely on other models to generate data, filter corpora, judge outputs, and guide development decisions. These dependencies are recursive: a model may depend on an upstream artifact whose own dependencies are documented only in separate releases and artifacts. As a result, the full dependency structure is fragmented across heterogeneous public artifacts, with complexity and recursive depth far outpacing humans' ability to trace. We introduce ModSleuth, an agentic system that recursively reconstructs LLM dependency graphs from public artifacts with sourc"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.12385","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T17:47:59Z","cross_cats_sorted":[],"title_canon_sha256":"e8eb5de7615eaf8dd2d73bce167df2ed0af833bfb8c37e296a13d1fbbd25ec92","abstract_canon_sha256":"251cb911af46b83230aeac1f867e03c7e2509887704f1e616efb3f8fe25b6eb3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T02:09:49.319287Z","signature_b64":"cB+N/7G4mAcytz3xvJDRed6KXE4S0lXUY7IG8Z+LOB1dCMrxoXlIseifHMNZwRXqFRE0O7zQtXERTrLAQNeJBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e093204dc675a8f252d381aca853d60ff78a4aeadba00e4a9621a4d1ee5d65b8","last_reissued_at":"2026-06-11T02:09:49.318774Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T02:09:49.318774Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Which Models Are Our Models Built On? Auditing Invisible Dependencies in Modern LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Haoxiang Sun, Sanjay Adhikesaven, Sewon Min","submitted_at":"2026-06-10T17:47:59Z","abstract_excerpt":"Modern LLM training pipelines increasingly rely on other models to generate data, filter corpora, judge outputs, and guide development decisions. These dependencies are recursive: a model may depend on an upstream artifact whose own dependencies are documented only in separate releases and artifacts. As a result, the full dependency structure is fragmented across heterogeneous public artifacts, with complexity and recursive depth far outpacing humans' ability to trace. We introduce ModSleuth, an agentic system that recursively reconstructs LLM dependency graphs from public artifacts with sourc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12385","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.12385/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.12385","created_at":"2026-06-11T02:09:49.318866+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.12385v1","created_at":"2026-06-11T02:09:49.318866+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12385","created_at":"2026-06-11T02:09:49.318866+00:00"},{"alias_kind":"pith_short_12","alias_value":"4CJSATOGOWUP","created_at":"2026-06-11T02:09:49.318866+00:00"},{"alias_kind":"pith_short_16","alias_value":"4CJSATOGOWUPEUWT","created_at":"2026-06-11T02:09:49.318866+00:00"},{"alias_kind":"pith_short_8","alias_value":"4CJSATOG","created_at":"2026-06-11T02:09:49.318866+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7","json":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7.json","graph_json":"https://pith.science/api/pith-number/4CJSATOGOWUPEUWTQGWKQU6WB7/graph.json","events_json":"https://pith.science/api/pith-number/4CJSATOGOWUPEUWTQGWKQU6WB7/events.json","paper":"https://pith.science/paper/4CJSATOG"},"agent_actions":{"view_html":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7","download_json":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7.json","view_paper":"https://pith.science/paper/4CJSATOG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.12385&json=true","fetch_graph":"https://pith.science/api/pith-number/4CJSATOGOWUPEUWTQGWKQU6WB7/graph.json","fetch_events":"https://pith.science/api/pith-number/4CJSATOGOWUPEUWTQGWKQU6WB7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7/action/storage_attestation","attest_author":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7/action/author_attestation","sign_citation":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7/action/citation_signature","submit_replication":"https://pith.science/pith/4CJSATOGOWUPEUWTQGWKQU6WB7/action/replication_record"}},"created_at":"2026-06-11T02:09:49.318866+00:00","updated_at":"2026-06-11T02:09:49.318866+00:00"}