{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:5HS32WKTEI46XSDITP2WKBG2BT","short_pith_number":"pith:5HS32WKT","schema_version":"1.0","canonical_sha256":"e9e5bd59532239ebc8689bf56504da0cf72ff0750be0e2c466295a7ae03c697c","source":{"kind":"arxiv","id":"1802.05438","version":5},"attestation_state":"computed","paper":{"title":"Mean Field Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.MA","authors_text":"Jun Wang, Ming Zhou, Minne Li, Rui Luo, Weinan Zhang, Yaodong Yang","submitted_at":"2018-02-15T09:07:57Z","abstract_excerpt":"Existing multi-agent reinforcement learning methods are limited typically to a small number of agents. When the agent number increases largely, the learning becomes intractable due to the curse of the dimensionality and the exponential growth of agent interactions. In this paper, we present \\emph{Mean Field Reinforcement Learning} where the interactions within the population of agents are approximated by those between a single agent and the average effect from the overall population or neighboring agents; the interplay between the two entities is mutually reinforced: the learning of the indivi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1802.05438","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MA","submitted_at":"2018-02-15T09:07:57Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"9798d5b299fba6fc0f60e9bffeb8b80b011f1ee31a5567b892b25375b2487cb3","abstract_canon_sha256":"91c4ac4d30db6f8ffa74190feff6df2a055ff6b7ff9f7047814817a2ee1baf23"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T01:59:25.210618Z","signature_b64":"AVScxT4rEgjkKqOnpMNi1J/lttlj0n8Y2hkehi0v+w5ezohxzSiiytfwPhbXIDlSZ8ryYCNRw93Oef79nAtKAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e9e5bd59532239ebc8689bf56504da0cf72ff0750be0e2c466295a7ae03c697c","last_reissued_at":"2026-07-05T01:59:25.210255Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T01:59:25.210255Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mean Field Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.MA","authors_text":"Jun Wang, Ming Zhou, Minne Li, Rui Luo, Weinan Zhang, Yaodong Yang","submitted_at":"2018-02-15T09:07:57Z","abstract_excerpt":"Existing multi-agent reinforcement learning methods are limited typically to a small number of agents. When the agent number increases largely, the learning becomes intractable due to the curse of the dimensionality and the exponential growth of agent interactions. In this paper, we present \\emph{Mean Field Reinforcement Learning} where the interactions within the population of agents are approximated by those between a single agent and the average effect from the overall population or neighboring agents; the interplay between the two entities is mutually reinforced: the learning of the indivi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1802.05438","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/1802.05438/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1802.05438","created_at":"2026-07-05T01:59:25.210308+00:00"},{"alias_kind":"arxiv_version","alias_value":"1802.05438v5","created_at":"2026-07-05T01:59:25.210308+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1802.05438","created_at":"2026-07-05T01:59:25.210308+00:00"},{"alias_kind":"pith_short_12","alias_value":"5HS32WKTEI46","created_at":"2026-07-05T01:59:25.210308+00:00"},{"alias_kind":"pith_short_16","alias_value":"5HS32WKTEI46XSDI","created_at":"2026-07-05T01:59:25.210308+00:00"},{"alias_kind":"pith_short_8","alias_value":"5HS32WKT","created_at":"2026-07-05T01:59:25.210308+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT","json":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT.json","graph_json":"https://pith.science/api/pith-number/5HS32WKTEI46XSDITP2WKBG2BT/graph.json","events_json":"https://pith.science/api/pith-number/5HS32WKTEI46XSDITP2WKBG2BT/events.json","paper":"https://pith.science/paper/5HS32WKT"},"agent_actions":{"view_html":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT","download_json":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT.json","view_paper":"https://pith.science/paper/5HS32WKT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1802.05438&json=true","fetch_graph":"https://pith.science/api/pith-number/5HS32WKTEI46XSDITP2WKBG2BT/graph.json","fetch_events":"https://pith.science/api/pith-number/5HS32WKTEI46XSDITP2WKBG2BT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT/action/storage_attestation","attest_author":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT/action/author_attestation","sign_citation":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT/action/citation_signature","submit_replication":"https://pith.science/pith/5HS32WKTEI46XSDITP2WKBG2BT/action/replication_record"}},"created_at":"2026-07-05T01:59:25.210308+00:00","updated_at":"2026-07-05T01:59:25.210308+00:00"}