{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2022:LEOW5YXO5L6CZGMLQ3ID7HBGBL","short_pith_number":"pith:LEOW5YXO","schema_version":"1.0","canonical_sha256":"591d6ee2eeeafc2c998b86d03f9c260aec93574f3bfe6d44a3707d501ca75b9a","source":{"kind":"arxiv","id":"2202.10574","version":4},"attestation_state":"computed","paper":{"title":"A Multi-Agent Reinforcement Learning Framework for Off-Policy Evaluation in Two-sided Markets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Chengchun Shi, Ge Song, Hongtu Zhu, Rui Song, Runzhe Wan, Shikai Luo","submitted_at":"2022-02-21T23:36:40Z","abstract_excerpt":"The two-sided markets such as ride-sharing companies often involve a group of subjects who are making sequential decisions across time and/or location. With the rapid development of smart phones and internet of things, they have substantially transformed the transportation landscape of human beings. In this paper we consider large-scale fleet management in ride-sharing companies that involve multiple units in different areas receiving sequences of products (or treatments) over time. Major technical challenges, such as policy evaluation, arise in those studies because (i) spatial and temporal p"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2202.10574","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2022-02-21T23:36:40Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"a30ca67c4cbeac3768eb6bbb302490fb603d45b393640eb21b45b7fd4bab5823","abstract_canon_sha256":"2d48f16b7e801ffbc5d9c42e63754e76c71fe7dfbfbef973d96f0804e8d60390"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T05:54:28.160492Z","signature_b64":"NRefj0WalHJhTaM9RNQG63XBi6u9Zrza4EWg7xmpa+2nT5376QTgr7RwN4kYt1s+sIG9T5Pa2Qqd5s4G3LQwBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"591d6ee2eeeafc2c998b86d03f9c260aec93574f3bfe6d44a3707d501ca75b9a","last_reissued_at":"2026-07-05T05:54:28.160012Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T05:54:28.160012Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Multi-Agent Reinforcement Learning Framework for Off-Policy Evaluation in Two-sided Markets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Chengchun Shi, Ge Song, Hongtu Zhu, Rui Song, Runzhe Wan, Shikai Luo","submitted_at":"2022-02-21T23:36:40Z","abstract_excerpt":"The two-sided markets such as ride-sharing companies often involve a group of subjects who are making sequential decisions across time and/or location. With the rapid development of smart phones and internet of things, they have substantially transformed the transportation landscape of human beings. In this paper we consider large-scale fleet management in ride-sharing companies that involve multiple units in different areas receiving sequences of products (or treatments) over time. Major technical challenges, such as policy evaluation, arise in those studies because (i) spatial and temporal p"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2202.10574","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2202.10574/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2202.10574","created_at":"2026-07-05T05:54:28.160073+00:00"},{"alias_kind":"arxiv_version","alias_value":"2202.10574v4","created_at":"2026-07-05T05:54:28.160073+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2202.10574","created_at":"2026-07-05T05:54:28.160073+00:00"},{"alias_kind":"pith_short_12","alias_value":"LEOW5YXO5L6C","created_at":"2026-07-05T05:54:28.160073+00:00"},{"alias_kind":"pith_short_16","alias_value":"LEOW5YXO5L6CZGML","created_at":"2026-07-05T05:54:28.160073+00:00"},{"alias_kind":"pith_short_8","alias_value":"LEOW5YXO","created_at":"2026-07-05T05:54:28.160073+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2502.16156","citing_title":"A Review of Causal Decision Making","ref_index":34,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL","json":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL.json","graph_json":"https://pith.science/api/pith-number/LEOW5YXO5L6CZGMLQ3ID7HBGBL/graph.json","events_json":"https://pith.science/api/pith-number/LEOW5YXO5L6CZGMLQ3ID7HBGBL/events.json","paper":"https://pith.science/paper/LEOW5YXO"},"agent_actions":{"view_html":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL","download_json":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL.json","view_paper":"https://pith.science/paper/LEOW5YXO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2202.10574&json=true","fetch_graph":"https://pith.science/api/pith-number/LEOW5YXO5L6CZGMLQ3ID7HBGBL/graph.json","fetch_events":"https://pith.science/api/pith-number/LEOW5YXO5L6CZGMLQ3ID7HBGBL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL/action/storage_attestation","attest_author":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL/action/author_attestation","sign_citation":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL/action/citation_signature","submit_replication":"https://pith.science/pith/LEOW5YXO5L6CZGMLQ3ID7HBGBL/action/replication_record"}},"created_at":"2026-07-05T05:54:28.160073+00:00","updated_at":"2026-07-05T05:54:28.160073+00:00"}