{"total":22,"items":[{"citing_arxiv_id":"2606.27884","ref_index":50,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SEADA: An efficient methodology for optimizing mixed-precision DNNs on multi-precision spatial architectures","primary_cat":"cs.AR","submitted_at":"2026-06-26T09:27:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"SEADA introduces an analytical framework combining cost models, mapping tools, and entropy-based precision selection to optimize mixed-precision DNNs on multi-precision spatial architectures.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.27759","ref_index":40,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Layerwise Progressive Freezing: A Training Scaffold for Depth-Scalable Binary Networks","primary_cat":"cs.LG","submitted_at":"2026-06-26T06:37:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"StoMPP progressively binarizes BNN layers layerwise from input to output via stochastic masks, delivering depth-scalable accuracy gains in a fully STE-free regime by controlling activation-induced gradient blockades.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.26822","ref_index":141,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Quantization in Federated Learning: Methods, Challenges and Future Directions","primary_cat":"cs.LG","submitted_at":"2026-06-25T10:03:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"This survey introduces a taxonomy for quantization in federated learning organized around client heterogeneity, aggregation consistency, non-IID robustness, privacy integration, and hardware co-optimization, while analyzing interactions with core FL behaviors.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22935","ref_index":28,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Hybrid Compression: Integrating Pruning and Quantization for Optimized Neural Networks","primary_cat":"cs.CV","submitted_at":"2026-06-22T07:11:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Hybrid method applies pruning and quantization followed by MoE routing of compressed CNN experts to achieve large reductions in FLOPs and parameters with negligible accuracy loss on benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04945","ref_index":146,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"STaR-Quant: State-Time Consistent Post-Training Quantization for Diffusion Large Language Models","primary_cat":"cs.LG","submitted_at":"2026-06-03T14:34:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"STaR-Quant provides a state-time consistent PTQ framework for DLLMs using SGAT and TAC to improve low-bit weight-activation quantization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04920","ref_index":56,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Toward Multi-Domain and Long-Tailed Quantization via Feature Alignment and Scaling","primary_cat":"cs.LG","submitted_at":"2026-06-03T14:16:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Proposes EmaQ and EmaQ-LT methods for multi-domain and long-tailed DNN quantization with CDF alignment, sensitivity aggregation, class-conditioned scaling, and convergence guarantees, showing strong low-bit results on Office-31, Digits, and long-tailed CIFAR variants.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.25469","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"JacQuant: STE-Free Quantization-Aware Training via Learned Jacobian Surrogates","primary_cat":"cs.LG","submitted_at":"2026-05-25T06:19:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"JacQuant learns a diagonal or block-diagonal Jacobian surrogate to replace STE in QAT, with convergence proofs and higher accuracy than STE at ≤2 bits on LLMs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22351","ref_index":65,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"QuantSR+: Pushing the Limit of Quantized Image Super-Resolution Networks","primary_cat":"cs.CV","submitted_at":"2026-05-21T11:38:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"QuantSR+ introduces RBD, QSA, and SFD techniques to achieve state-of-the-art accuracy-efficiency trade-offs in 2-4 bit quantized image super-resolution networks, with reported PSNR gains like 0.29 dB on Urban100 for SwinIR-S.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21171","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"FTerViT: Fully Ternary Vision Transformer","primary_cat":"cs.CV","submitted_at":"2026-05-20T13:41:53+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"FTerViT introduces fully ternary Vision Transformers with TernaryBitConv2d and TernaryLayerNorm operators, achieving 82.43% ImageNet top-1 at 6.09 MB with 15x compression.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20289","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Plug-and-Play Spiking Operators: Breaking the Nonlinearity Bottleneck in Spiking Transformers","primary_cat":"cs.LG","submitted_at":"2026-05-19T06:59:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A modular framework decomposes Transformer nonlinearities into spike-compatible primitives realized via LIF population coding and bit-shift scaling, supporting Softmax, SiLU, and normalization with under 1% accuracy drop in LLMs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10989","ref_index":20,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SURGE: Surrogate Gradient Adaptation in Binary Neural Networks","primary_cat":"cs.LG","submitted_at":"2026-05-09T09:52:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SURGE proposes a dual-path gradient compensator and adaptive gradient scaler to mitigate gradient mismatch in binary neural network training via auxiliary backpropagation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05994","ref_index":16,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"DiBA: Diagonal and Binary Matrix Approximation for Neural Network Weight Compression","primary_cat":"cs.LG","submitted_at":"2026-05-07T10:46:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DiBA factors weight matrices into diagonal-binary-diagonal-binary-diagonal form to cut matrix-vector multiplies from mn to m+k+n operations and improves accuracy on DistilBERT and audio transformer tasks after replacement.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.03396","ref_index":9,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Design and Implementation of BNN-Based Object Detection on FPGA","primary_cat":"cs.AR","submitted_at":"2026-05-05T06:16:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A BNN-based YOLOv3-tiny-like object detector with 1-bit weights and 8-bit activations is implemented in Verilog on FPGA, achieving 39.6% mAP50 on VOC and 0.999964 correlation with the ONNX model in RTL simulation.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":": BinaryConnect: Training Deep Neural Networks with Binary Weights during Propagations. In: NeurIPS (2015). [7] Hubara, I., Courbariaux, M., Soudry, D., et al.: Binarized Neural Networks. arXiv:1602.02830 (2016). [8] Rastegari, M., Ordonez, V., Redmon, J., Farhadi, A.: XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks. In: ECCV, pp. 525-542 (2016). [9] Zhou, S., Wu, Y., Ni, Z., et al.: DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients. arXiv:1606.06160 (2016). [10] Liu, Z., Shen, Z., Savvides, M., Cheng, K.T.: ReActNet: Towards Precise Binary Neural Network with Generalized Activation Functions. arXiv:2003.03488 (2020). [11] Esser, S.K., McKinstry, J."},{"citing_arxiv_id":"2604.25903","ref_index":73,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Carbon-Taxed Transformers: A Green Compression Pipeline for Overgrown Language Models","primary_cat":"cs.SE","submitted_at":"2026-04-28T17:48:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"CTT is a compression pipeline for LLMs that achieves up to 49x memory reduction, 10x faster inference, 81% lower CO2 emissions, and retains 68-98% accuracy on code clone detection, summarization, and generation tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26979","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Multibit neural inference in a N-ary crossbar architecture","primary_cat":"cs.AR","submitted_at":"2026-04-28T13:29:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Simulation of 4-state MTJ crossbars achieves 94.48% MNIST accuracy for neural inference, close to 97.56% software baseline, with analysis showing quantization as primary error and an optimal number of states per cell.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.10861","ref_index":39,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Training single-electron and single-photon stochastic physical neural networks","primary_cat":"quant-ph","submitted_at":"2026-04-12T23:57:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Single-electron and single-photon stochastic physical neural networks achieve over 97% MNIST test accuracy when trained with empirical outputs in the backward pass using few trials per layer.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.04988","ref_index":14,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Prune-Quantize-Distill: An Ordered Pipeline for Efficient Neural Network Compression","primary_cat":"cs.LG","submitted_at":"2026-04-05T06:13:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"The prune-quantize-distill ordering produces a better accuracy-size-latency frontier on CIFAR-10/100 than any single technique or other orderings, with INT8 QAT providing the main runtime gain.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2509.03472","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DPQuant: Efficient and Differentially-Private Model Training via Dynamic Quantization Scheduling","primary_cat":"cs.LG","submitted_at":"2025-09-03T16:51:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DPQuant uses epoch-wise probabilistic layer rotation and DP loss sensitivity to quantize only a changing subset of layers, reducing accuracy degradation from quantization noise in DP-SGD and delivering up to 2.21x throughput gains with under 2% accuracy drop.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2408.00923","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Reclaiming Residual Knowledge: A Novel Paradigm to Low-Bit Quantization","primary_cat":"cs.CV","submitted_at":"2024-08-01T21:27:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CoRa reclaims quantization residuals in pre-trained ConvNets by searching low-rank adapter architectures instead of weights, matching SOTA accuracy on ImageNet in 3-4 bit settings with under 250 iterations on 1600 images.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2209.05433","ref_index":26,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"FP8 Formats for Deep Learning","primary_cat":"cs.LG","submitted_at":"2022-09-12T17:39:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FP8 formats E4M3 and E5M2 match 16-bit training accuracy on CNNs, RNNs, and Transformers up to 175B parameters without hyperparameter changes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"1907.00593","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Weight Normalization based Quantization for Deep Neural Network Compression","primary_cat":"cs.LG","submitted_at":"2019-07-01T07:59:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"WNQ uses weight normalization to reshape weight distributions and reduce quantization error, outperforming baselines on CIFAR-100 and ImageNet.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"1710.03740","ref_index":34,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Mixed Precision Training","primary_cat":"cs.AI","submitted_at":"2017-10-10T17:42:04+00:00","verdict":"ACCEPT","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Mixed precision training uses FP16 for most computations, FP32 master weights for accumulation, and loss scaling to enable accurate training of large DNNs with halved memory usage.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}