{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:O4RUHQ6FO7NAIIIKB4HIKDISHV","short_pith_number":"pith:O4RUHQ6F","canonical_record":{"source":{"id":"2605.14438","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-14T06:33:41Z","cross_cats_sorted":[],"title_canon_sha256":"2619a11dffbed1535207813ea1019ce681a99d86d6f9dca78f18cba0e2efe99a","abstract_canon_sha256":"f55f67de57dd4f29b5d41dca7d04ce921475bdd72c6d0553c4a886129a031cd6"},"schema_version":"1.0"},"canonical_sha256":"772343c3c577da04210a0f0e850d123d5ab13c3d5508fac0d34dbf7b580dff8e","source":{"kind":"arxiv","id":"2605.14438","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14438","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14438v1","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14438","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"pith_short_12","alias_value":"O4RUHQ6FO7NA","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"O4RUHQ6FO7NAIIIK","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"O4RUHQ6F","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:O4RUHQ6FO7NAIIIKB4HIKDISHV","target":"record","payload":{"canonical_record":{"source":{"id":"2605.14438","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-14T06:33:41Z","cross_cats_sorted":[],"title_canon_sha256":"2619a11dffbed1535207813ea1019ce681a99d86d6f9dca78f18cba0e2efe99a","abstract_canon_sha256":"f55f67de57dd4f29b5d41dca7d04ce921475bdd72c6d0553c4a886129a031cd6"},"schema_version":"1.0"},"canonical_sha256":"772343c3c577da04210a0f0e850d123d5ab13c3d5508fac0d34dbf7b580dff8e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:07.054058Z","signature_b64":"IPjC3GYDblAX+LHkYEaKrUAguzIwQ7T6+voWOmTalPS9sBgYuU8VoIuIOjeLKVXc5Sfae9+J1pBfAoEtzE4BDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"772343c3c577da04210a0f0e850d123d5ab13c3d5508fac0d34dbf7b580dff8e","last_reissued_at":"2026-05-17T23:39:07.053456Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:07.053456Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.14438","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YEgtTHrbjm1+MiNiUzYuaQpDEQ01lVdSrefDiu7qkTHuM7Zs7hW9ubzPeNZFW9qKPwRXZ8H0G950I79NVAfJCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T23:43:13.610021Z"},"content_sha256":"d2cf398c34555be7025f4ea741e4bcf8bd547113df2d5c96546d1226b6201d00","schema_version":"1.0","event_id":"sha256:d2cf398c34555be7025f4ea741e4bcf8bd547113df2d5c96546d1226b6201d00"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:O4RUHQ6FO7NAIIIKB4HIKDISHV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"BEAM: Binary Expert Activation Masking for Dynamic Routing in MoE","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"Trainable binary masks let MoE models pick experts token-by-token, cutting expert-layer FLOPs by up to 85 percent while keeping more than 98 percent of original accuracy.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Fuyu Lv, Jialiang Cheng, Juntong Wu, Li Yuan, Ou Dan, Qishen Yin, Yue Dai, Yuliang Yan","submitted_at":"2026-05-14T06:33:41Z","abstract_excerpt":"Mixture-of-Experts (MoE) architectures enhance the efficiency of large language models by activating only a subset of experts per token. However, standard MoE employs a fixed Top-K routing strategy, leading to redundant computation and suboptimal inference latency. Existing acceleration methods either require costly retraining with architectural changes or suffer from severe performance drop at high sparsity due to train-inference mismatch. To address these limitations, we propose BEAM (Binary Expert Activation Masking), a novel method that learns token-adaptive expert selection via trainable "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"BEAM retains over 98% of the original model's performance while reducing MoE layer FLOPs by up to 85%, achieving up to 2.5× faster decoding and 1.4× higher throughput, as a practical plug-and-play solution.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the binary masks learned during training will generalize well to inference without significant mismatch, and that the straight-through estimator combined with the auxiliary loss can induce effective sparsity without degrading model capability.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"BEAM uses binary expert activation masks trained end-to-end to achieve dynamic sparsity in MoE models, cutting FLOPs by 85% with over 98% performance retention.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Trainable binary masks let MoE models pick experts token-by-token, cutting expert-layer FLOPs by up to 85 percent while keeping more than 98 percent of original accuracy.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"025546b93eb4c03000c0af869dcf3ecdfd4721fdccbe18ba8d103656f1a5cacd"},"source":{"id":"2605.14438","kind":"arxiv","version":1},"verdict":{"id":"8f6cfee4-f40e-4595-8e61-c6f61280a7d1","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T01:55:47.778168Z","strongest_claim":"BEAM retains over 98% of the original model's performance while reducing MoE layer FLOPs by up to 85%, achieving up to 2.5× faster decoding and 1.4× higher throughput, as a practical plug-and-play solution.","one_line_summary":"BEAM uses binary expert activation masks trained end-to-end to achieve dynamic sparsity in MoE models, cutting FLOPs by 85% with over 98% performance retention.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the binary masks learned during training will generalize well to inference without significant mismatch, and that the straight-through estimator combined with the auxiliary loss can induce effective sparsity without degrading model capability.","pith_extraction_headline":"Trainable binary masks let MoE models pick experts token-by-token, cutting expert-layer FLOPs by up to 85 percent while keeping more than 98 percent of original accuracy."},"references":{"count":25,"sample":[{"doi":"","year":null,"title":"Da-moe: Towards dy- namic expert allocation for mixture-of-experts models","work_id":"5fd4b7b5-86f8-48ca-b804-d3a3ae6abd11","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"ConfLayers: Adaptive Confidence-based Layer Skipping for Self-Speculative Decoding","work_id":"a1255ddd-2b2e-4f7b-bbb5-ea72ee18bfcf","ref_index":2,"cited_arxiv_id":"2604.14612","is_internal_anchor":true},{"doi":"","year":null,"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","ref_index":3,"cited_arxiv_id":"2309.16609","is_internal_anchor":true},{"doi":"","year":null,"title":"Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation","work_id":"1fe8c7c8-aff7-4b94-9096-e549d7e60789","ref_index":4,"cited_arxiv_id":"1308.3432","is_internal_anchor":true},{"doi":"","year":2019,"title":"BoolQ: Exploring the surprising difficulty of natural yes/no questions","work_id":"8d3d9bd8-a118-422a-a695-404ed9e21211","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":25,"snapshot_sha256":"63db9dbba5e37299dd7bf8bfe014982d21c651820c9be4abc6a0239f9c9d37ad","internal_anchors":9},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"8f6cfee4-f40e-4595-8e61-c6f61280a7d1"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"r70wnpr9J5RvfhIyLJmojs6bO49XoC7VlG08d+oVcJW4nxNYrnT+6pxluT9injYaBK53yFFd8z9W0avKuqb5BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T23:43:13.611042Z"},"content_sha256":"c1cc70cc75a1a86b7b4f15ae63941e27b3eeb535c9ab85fbea9d08d600922385","schema_version":"1.0","event_id":"sha256:c1cc70cc75a1a86b7b4f15ae63941e27b3eeb535c9ab85fbea9d08d600922385"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/O4RUHQ6FO7NAIIIKB4HIKDISHV/bundle.json","state_url":"https://pith.science/pith/O4RUHQ6FO7NAIIIKB4HIKDISHV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/O4RUHQ6FO7NAIIIKB4HIKDISHV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T23:43:13Z","links":{"resolver":"https://pith.science/pith/O4RUHQ6FO7NAIIIKB4HIKDISHV","bundle":"https://pith.science/pith/O4RUHQ6FO7NAIIIKB4HIKDISHV/bundle.json","state":"https://pith.science/pith/O4RUHQ6FO7NAIIIKB4HIKDISHV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/O4RUHQ6FO7NAIIIKB4HIKDISHV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:O4RUHQ6FO7NAIIIKB4HIKDISHV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f55f67de57dd4f29b5d41dca7d04ce921475bdd72c6d0553c4a886129a031cd6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-14T06:33:41Z","title_canon_sha256":"2619a11dffbed1535207813ea1019ce681a99d86d6f9dca78f18cba0e2efe99a"},"schema_version":"1.0","source":{"id":"2605.14438","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14438","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14438v1","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14438","created_at":"2026-05-17T23:39:07Z"},{"alias_kind":"pith_short_12","alias_value":"O4RUHQ6FO7NA","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"O4RUHQ6FO7NAIIIK","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"O4RUHQ6F","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:c1cc70cc75a1a86b7b4f15ae63941e27b3eeb535c9ab85fbea9d08d600922385","target":"graph","created_at":"2026-05-17T23:39:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"BEAM retains over 98% of the original model's performance while reducing MoE layer FLOPs by up to 85%, achieving up to 2.5× faster decoding and 1.4× higher throughput, as a practical plug-and-play solution."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the binary masks learned during training will generalize well to inference without significant mismatch, and that the straight-through estimator combined with the auxiliary loss can induce effective sparsity without degrading model capability."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"BEAM uses binary expert activation masks trained end-to-end to achieve dynamic sparsity in MoE models, cutting FLOPs by 85% with over 98% performance retention."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Trainable binary masks let MoE models pick experts token-by-token, cutting expert-layer FLOPs by up to 85 percent while keeping more than 98 percent of original accuracy."}],"snapshot_sha256":"025546b93eb4c03000c0af869dcf3ecdfd4721fdccbe18ba8d103656f1a5cacd"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Mixture-of-Experts (MoE) architectures enhance the efficiency of large language models by activating only a subset of experts per token. However, standard MoE employs a fixed Top-K routing strategy, leading to redundant computation and suboptimal inference latency. Existing acceleration methods either require costly retraining with architectural changes or suffer from severe performance drop at high sparsity due to train-inference mismatch. To address these limitations, we propose BEAM (Binary Expert Activation Masking), a novel method that learns token-adaptive expert selection via trainable ","authors_text":"Fuyu Lv, Jialiang Cheng, Juntong Wu, Li Yuan, Ou Dan, Qishen Yin, Yue Dai, Yuliang Yan","cross_cats":[],"headline":"Trainable binary masks let MoE models pick experts token-by-token, cutting expert-layer FLOPs by up to 85 percent while keeping more than 98 percent of original accuracy.","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-14T06:33:41Z","title":"BEAM: Binary Expert Activation Masking for Dynamic Routing in MoE"},"references":{"count":25,"internal_anchors":9,"resolved_work":25,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Da-moe: Towards dy- namic expert allocation for mixture-of-experts models","work_id":"5fd4b7b5-86f8-48ca-b804-d3a3ae6abd11","year":null},{"cited_arxiv_id":"2604.14612","doi":"","is_internal_anchor":true,"ref_index":2,"title":"ConfLayers: Adaptive Confidence-based Layer Skipping for Self-Speculative Decoding","work_id":"a1255ddd-2b2e-4f7b-bbb5-ea72ee18bfcf","year":null},{"cited_arxiv_id":"2309.16609","doi":"","is_internal_anchor":true,"ref_index":3,"title":"Qwen Technical Report","work_id":"bb1fd52f-6b2f-437c-9516-37bdf6eb9be8","year":null},{"cited_arxiv_id":"1308.3432","doi":"","is_internal_anchor":true,"ref_index":4,"title":"Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation","work_id":"1fe8c7c8-aff7-4b94-9096-e549d7e60789","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"BoolQ: Exploring the surprising difficulty of natural yes/no questions","work_id":"8d3d9bd8-a118-422a-a695-404ed9e21211","year":2019}],"snapshot_sha256":"63db9dbba5e37299dd7bf8bfe014982d21c651820c9be4abc6a0239f9c9d37ad"},"source":{"id":"2605.14438","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T01:55:47.778168Z","id":"8f6cfee4-f40e-4595-8e61-c6f61280a7d1","model_set":{"reader":"grok-4.3"},"one_line_summary":"BEAM uses binary expert activation masks trained end-to-end to achieve dynamic sparsity in MoE models, cutting FLOPs by 85% with over 98% performance retention.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Trainable binary masks let MoE models pick experts token-by-token, cutting expert-layer FLOPs by up to 85 percent while keeping more than 98 percent of original accuracy.","strongest_claim":"BEAM retains over 98% of the original model's performance while reducing MoE layer FLOPs by up to 85%, achieving up to 2.5× faster decoding and 1.4× higher throughput, as a practical plug-and-play solution.","weakest_assumption":"That the binary masks learned during training will generalize well to inference without significant mismatch, and that the straight-through estimator combined with the auxiliary loss can induce effective sparsity without degrading model capability."}},"verdict_id":"8f6cfee4-f40e-4595-8e61-c6f61280a7d1"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d2cf398c34555be7025f4ea741e4bcf8bd547113df2d5c96546d1226b6201d00","target":"record","created_at":"2026-05-17T23:39:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f55f67de57dd4f29b5d41dca7d04ce921475bdd72c6d0553c4a886129a031cd6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-14T06:33:41Z","title_canon_sha256":"2619a11dffbed1535207813ea1019ce681a99d86d6f9dca78f18cba0e2efe99a"},"schema_version":"1.0","source":{"id":"2605.14438","kind":"arxiv","version":1}},"canonical_sha256":"772343c3c577da04210a0f0e850d123d5ab13c3d5508fac0d34dbf7b580dff8e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"772343c3c577da04210a0f0e850d123d5ab13c3d5508fac0d34dbf7b580dff8e","first_computed_at":"2026-05-17T23:39:07.053456Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:07.053456Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IPjC3GYDblAX+LHkYEaKrUAguzIwQ7T6+voWOmTalPS9sBgYuU8VoIuIOjeLKVXc5Sfae9+J1pBfAoEtzE4BDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:07.054058Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14438","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d2cf398c34555be7025f4ea741e4bcf8bd547113df2d5c96546d1226b6201d00","sha256:c1cc70cc75a1a86b7b4f15ae63941e27b3eeb535c9ab85fbea9d08d600922385"],"state_sha256":"697942eabb9a7a25e654676132540572aed3ddbbcf46e7d25db5c70762175814"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pPVTDbXNlOGWfCv3Q1GYSksbbZVfUq7BjJJl4t3QlW8NUnDc2E28r6anLcJgDqpIswXbBM8rSl6T2lbbz8GaBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T23:43:13.614911Z","bundle_sha256":"87407c16ce2664c08a369a3c866fc8bd9fb54478bbde0e359bb86ad86eb44b96"}}