{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:YMO22FR2JFLZC74QYDXDLKSPTW","short_pith_number":"pith:YMO22FR2","schema_version":"1.0","canonical_sha256":"c31dad163a4957917f90c0ee35aa4f9db202bf58ba6fc405ee42594e1ed38dd3","source":{"kind":"arxiv","id":"2606.05874","version":1},"attestation_state":"computed","paper":{"title":"Evaluating Stochastic Collapse and Implicit Bias in Multimodal Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Boyang Wang, Hongcheng Guo, Houtao Zhang, Huiyuan Zheng, Qingyi Si","submitted_at":"2026-06-04T08:47:15Z","abstract_excerpt":"Current evaluations for Multimodal Large Language Models (MLLMs) overwhelmingly focus on utility-driven objectives, leaving model behavior under logic-neutral scenarios largely underexplored. Stochasticity is essential in scenarios where multiple actions are equally valid, such as recommending travel itineraries or daily schedules where multiple options have similar utility. In such settings, deterministic policies may lead to repetitive behaviors and reduced coverage of valid alternatives. To bridge this gap, we propose RandomBench, a benchmark designed to evaluate whether MLLMs can maintain "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.05874","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-04T08:47:15Z","cross_cats_sorted":[],"title_canon_sha256":"54ac6aca650eee9f6dce4adfb4836a58f000f37bb149467064f9958a96917e05","abstract_canon_sha256":"74768fd2311d870dfff06f868d864c54f969b2e2b52216c0ac75a08ede9cf738"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:06.486531Z","signature_b64":"8ke0duWq7esa1DGTwDFAfwu2W+5xHRH3l1A5U7lKMQzKBieXR3OGYDOadKK2CjnfgiMrPCsJuvWbvw0v5P54BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c31dad163a4957917f90c0ee35aa4f9db202bf58ba6fc405ee42594e1ed38dd3","last_reissued_at":"2026-06-05T01:15:06.486129Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:06.486129Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Evaluating Stochastic Collapse and Implicit Bias in Multimodal Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Boyang Wang, Hongcheng Guo, Houtao Zhang, Huiyuan Zheng, Qingyi Si","submitted_at":"2026-06-04T08:47:15Z","abstract_excerpt":"Current evaluations for Multimodal Large Language Models (MLLMs) overwhelmingly focus on utility-driven objectives, leaving model behavior under logic-neutral scenarios largely underexplored. Stochasticity is essential in scenarios where multiple actions are equally valid, such as recommending travel itineraries or daily schedules where multiple options have similar utility. In such settings, deterministic policies may lead to repetitive behaviors and reduced coverage of valid alternatives. To bridge this gap, we propose RandomBench, a benchmark designed to evaluate whether MLLMs can maintain "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.05874","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.05874/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.05874","created_at":"2026-06-05T01:15:06.486189+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.05874v1","created_at":"2026-06-05T01:15:06.486189+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.05874","created_at":"2026-06-05T01:15:06.486189+00:00"},{"alias_kind":"pith_short_12","alias_value":"YMO22FR2JFLZ","created_at":"2026-06-05T01:15:06.486189+00:00"},{"alias_kind":"pith_short_16","alias_value":"YMO22FR2JFLZC74Q","created_at":"2026-06-05T01:15:06.486189+00:00"},{"alias_kind":"pith_short_8","alias_value":"YMO22FR2","created_at":"2026-06-05T01:15:06.486189+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW","json":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW.json","graph_json":"https://pith.science/api/pith-number/YMO22FR2JFLZC74QYDXDLKSPTW/graph.json","events_json":"https://pith.science/api/pith-number/YMO22FR2JFLZC74QYDXDLKSPTW/events.json","paper":"https://pith.science/paper/YMO22FR2"},"agent_actions":{"view_html":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW","download_json":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW.json","view_paper":"https://pith.science/paper/YMO22FR2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.05874&json=true","fetch_graph":"https://pith.science/api/pith-number/YMO22FR2JFLZC74QYDXDLKSPTW/graph.json","fetch_events":"https://pith.science/api/pith-number/YMO22FR2JFLZC74QYDXDLKSPTW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW/action/storage_attestation","attest_author":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW/action/author_attestation","sign_citation":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW/action/citation_signature","submit_replication":"https://pith.science/pith/YMO22FR2JFLZC74QYDXDLKSPTW/action/replication_record"}},"created_at":"2026-06-05T01:15:06.486189+00:00","updated_at":"2026-06-05T01:15:06.486189+00:00"}