{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:EBO4TVBBM2MVD6EHDI4LRQYGO3","short_pith_number":"pith:EBO4TVBB","schema_version":"1.0","canonical_sha256":"205dc9d421669951f8871a38b8c30676c3e624bad2f01604aff59d33925eee52","source":{"kind":"arxiv","id":"2506.22141","version":2},"attestation_state":"computed","paper":{"title":"DAPFAM: A Domain-Aware Family-level Dataset to benchmark cross domain patent retrieval","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CL","authors_text":"Denis Cavallucci (ICube), Hicham Chibane (ICube), Iliass Ayaou (ICube)","submitted_at":"2025-06-27T11:34:51Z","abstract_excerpt":"Patent prior-art retrieval becomes especially challenging when relevant disclosures cross technological boundaries. Existing benchmarks lack explicit domain partitions, making it difficult to assess how retrieval systems cope with such shifts. We introduce DAPFAM, a family-level benchmark with explicit IN-domain and OUT-domain partitions defined by a new IPC3 overlap scheme. The dataset contains 1,247 query families and 45,336 target families aggregated at the family level to reduce international redundancy, with citation based relevance judgments. We conduct 249 controlled experiments spannin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2506.22141","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-06-27T11:34:51Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"1e5957e227740b67f05597ef56c356e7c1ac871c883f1da13b291e2b840477c2","abstract_canon_sha256":"498b1007128666c96156453cb960ddea7fcba9be0fd6de633b54a951c9df6b27"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:09:14.853687Z","signature_b64":"JHaiHgzx6GPSRkdgCtCr6UIB76UYkb/okHVt8csuf7ClqYvJOhza2ic8IQ5VBSTjz5MOTRLmYoxk1A0++EmwBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"205dc9d421669951f8871a38b8c30676c3e624bad2f01604aff59d33925eee52","last_reissued_at":"2026-06-11T01:09:14.852711Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:09:14.852711Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"DAPFAM: A Domain-Aware Family-level Dataset to benchmark cross domain patent retrieval","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CL","authors_text":"Denis Cavallucci (ICube), Hicham Chibane (ICube), Iliass Ayaou (ICube)","submitted_at":"2025-06-27T11:34:51Z","abstract_excerpt":"Patent prior-art retrieval becomes especially challenging when relevant disclosures cross technological boundaries. Existing benchmarks lack explicit domain partitions, making it difficult to assess how retrieval systems cope with such shifts. We introduce DAPFAM, a family-level benchmark with explicit IN-domain and OUT-domain partitions defined by a new IPC3 overlap scheme. The dataset contains 1,247 query families and 45,336 target families aggregated at the family level to reduce international redundancy, with citation based relevance judgments. We conduct 249 controlled experiments spannin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.22141","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2506.22141/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2506.22141","created_at":"2026-06-11T01:09:14.852855+00:00"},{"alias_kind":"arxiv_version","alias_value":"2506.22141v2","created_at":"2026-06-11T01:09:14.852855+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.22141","created_at":"2026-06-11T01:09:14.852855+00:00"},{"alias_kind":"pith_short_12","alias_value":"EBO4TVBBM2MV","created_at":"2026-06-11T01:09:14.852855+00:00"},{"alias_kind":"pith_short_16","alias_value":"EBO4TVBBM2MVD6EH","created_at":"2026-06-11T01:09:14.852855+00:00"},{"alias_kind":"pith_short_8","alias_value":"EBO4TVBB","created_at":"2026-06-11T01:09:14.852855+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2604.22897","citing_title":"Citation-Driven Multi-View Training for Patent Embeddings: QaECTER and Sophia-Bench","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18882","citing_title":"Formally Verified Patent Analysis via Dependent Type Theory: Machine-Checkable Certificates from a Hybrid AI + Lean 4 Pipeline","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02392","citing_title":"Is It Novel and Why? Fine-Grained Patent Novelty Prediction Based on Passage Retrieval","ref_index":7,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3","json":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3.json","graph_json":"https://pith.science/api/pith-number/EBO4TVBBM2MVD6EHDI4LRQYGO3/graph.json","events_json":"https://pith.science/api/pith-number/EBO4TVBBM2MVD6EHDI4LRQYGO3/events.json","paper":"https://pith.science/paper/EBO4TVBB"},"agent_actions":{"view_html":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3","download_json":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3.json","view_paper":"https://pith.science/paper/EBO4TVBB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2506.22141&json=true","fetch_graph":"https://pith.science/api/pith-number/EBO4TVBBM2MVD6EHDI4LRQYGO3/graph.json","fetch_events":"https://pith.science/api/pith-number/EBO4TVBBM2MVD6EHDI4LRQYGO3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3/action/storage_attestation","attest_author":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3/action/author_attestation","sign_citation":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3/action/citation_signature","submit_replication":"https://pith.science/pith/EBO4TVBBM2MVD6EHDI4LRQYGO3/action/replication_record"}},"created_at":"2026-06-11T01:09:14.852855+00:00","updated_at":"2026-06-11T01:09:14.852855+00:00"}