{"total":49,"items":[{"citing_arxiv_id":"2605.13493","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"PhysEditBench: A Protocol-Conditioned Benchmark for Dense Physical-Map Prediction with Image Editors","primary_cat":"cs.CV","submitted_at":"2026-05-13T13:17:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PhysEditBench is a protocol-conditioned benchmark evaluating image editors on dense prediction of depth, normal, albedo, roughness, and metallic maps from RGB images using curated data and fixed scoring rules.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13153","ref_index":60,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Strikingness-Aware Evaluation for Temporal Knowledge Graph Reasoning","primary_cat":"cs.AI","submitted_at":"2026-05-13T08:17:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A rule-based strikingness measure is added to TKGR metrics to weight rare events higher, revealing that models weaken on striking events and ensemble gains come mostly from trivial fits.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12952","ref_index":5,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Debunking Grad-ECLIP: A Comprehensive Study on Its Incorrectness and Fundamental Principles for Model Interpretation","primary_cat":"cs.CV","submitted_at":"2026-05-13T03:35:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Grad-ECLIP is an equivalent but flawed variant of attention-based interpretation, with two principles proposed to ensure model explanations reflect the original model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12168","ref_index":76,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"On What We Can Learn from Low-Resolution Data","primary_cat":"cs.LG","submitted_at":"2026-05-12T14:16:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Low-resolution data improves high-resolution model performance when high-resolution samples are limited, via KL-divergence bounds and experiments on vision transformers and CNNs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11913","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Vector Scaffolding: Inter-Scale Orchestration for Differentiable Image Vectorization","primary_cat":"cs.CV","submitted_at":"2026-05-12T10:27:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Vector Scaffolding uses Interior Gradient Aggregation, Progressive Stratification, and Rapid Inflation Scheduling to achieve 2.5x faster optimization and up to 1.4 dB higher PSNR in differentiable vectorization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11718","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Self-organized MT Direction Maps Emerge from Spatiotemporal Contrastive Optimization","primary_cat":"q-bio.NC","submitted_at":"2026-05-12T08:05:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Direction maps and pinwheel structures in MT emerge spontaneously when a spatiotemporal deep network is trained on videos with contrastive self-supervised learning and spatial regularization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11714","ref_index":56,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Introducing Environmental Constraints to Grasping Strategies for Paper-Like Flexible Materials Using a Soft Gripper","primary_cat":"cs.RO","submitted_at":"2026-05-12T08:03:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Systematic grasping strategies for paper-like materials are developed and tested with a soft gripper by exploiting environmental constraints to improve force control and success rates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11508","ref_index":51,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"LiBrA-Net: Lie-Algebraic Bilateral Affine Fields for Real-Time 4K Video Dehazing","primary_cat":"cs.CV","submitted_at":"2026-05-12T04:27:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LiBrA-Net achieves real-time native 4K video dehazing via Lie-algebraic bilateral affine fields and releases the first 4K paired dehazing video benchmark with per-frame annotations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10922","ref_index":28,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Pixal3D: Pixel-Aligned 3D Generation from Images","primary_cat":"cs.CV","submitted_at":"2026-05-11T17:55:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Pixal3D performs pixel-aligned 3D generation from images via back-projected multi-scale feature volumes, achieving fidelity close to reconstruction while supporting multi-view and scene synthesis.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09677","ref_index":53,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"VFM-SDM: A vision foundation model-based framework for training-free, marker-free, and calibration-free structural displacement measurement","primary_cat":"cs.CV","submitted_at":"2026-05-10T17:51:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"VFM-SDM enables accurate multi-directional structural displacement measurement from video using pre-trained vision models for camera estimation and point tracking, combined with geometry constraints, without task-specific training or preparation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09422","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Perception Without Engagement: Dissecting the Causal Discovery Deficit in LMMs","primary_cat":"cs.CL","submitted_at":"2026-05-10T08:48:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LMMs perceive videos but underexploit visual content for causal reasoning due to textual shortcuts; ProCauEval diagnoses this and ADPO training reduces reliance on priors.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08577","ref_index":31,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Improving Generative Adversarial Networks with Self-Distillation","primary_cat":"cs.CV","submitted_at":"2026-05-09T00:40:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SD-GAN uses the EMA generator as a teacher to distill perceptual knowledge to the training generator, improving FID scores, stabilizing training, and providing guidance uncorrelated with standard adversarial loss.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06927","ref_index":40,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"XiYOLO: Energy-Aware Object Detection via Iterative Architecture Search and Scaling","primary_cat":"cs.CV","submitted_at":"2026-05-07T20:38:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"XiYOLO uses iterative energy-aware neural architecture search and scaling to produce object detectors with stronger accuracy-energy tradeoffs than YOLO baselines on GPUs and NPUs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05908","ref_index":31,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Architecture-agnostic Lipschitz-constant Bayesian header and its application to resolve semantically proximal classification errors with vision transformers","primary_cat":"cs.CV","submitted_at":"2026-05-07T09:18:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LipB-ViT adds bi-Lipschitz Bayesian layers to vision transformers and uses uncertainty-aware fusion to identify corrupted labels with over 93% recall at 15% noise, beating kNN baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08175","ref_index":23,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"KARMA-MV: A Benchmark for Causal Question Answering on Music Videos","primary_cat":"cs.CV","submitted_at":"2026-05-05T06:48:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"KARMA-MV is a new benchmark showing that causal knowledge graphs improve VLMs on causal audio-visual reasoning in music videos.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01817","ref_index":41,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Skipping the Zeros in Diffusion Models for Sparse Data Generation","primary_cat":"cs.LG","submitted_at":"2026-05-03T10:51:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SED modifies diffusion models to generate only non-zero values in sparse data, preserving sparsity patterns, cutting computation, and matching or beating standard DM performance on benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01543","ref_index":28,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Physics-Guided Deep Learning For High Resolution X-ray Imaging","primary_cat":"eess.SP","submitted_at":"2026-05-02T17:15:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Physics-guided U-Net removes non-stationary artifacts from X-ray images, raising mean SSIM from 0.345 to 0.906 and 0.0679 to 0.945 in synthetic tests while preserving filament profiles better than Fourier filtering or DFFN.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01217","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Asymmetric Invertible Threat: Learning Reversible Privacy Defense for Face Recognition","primary_cat":"cs.CV","submitted_at":"2026-05-02T03:18:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ARFP is a key-conditioned reversible face cloaking method that resists unauthorized restoration attacks while enabling authorized recovery with tamper indication.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01165","ref_index":16,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CEZSAR: A Contrastive Embedding Method for Zero-Shot Action Recognition","primary_cat":"cs.CV","submitted_at":"2026-05-01T23:47:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CEZSAR uses contrastive learning to align video and sentence embeddings with automatic negative sampling, claiming state-of-the-art zero-shot action recognition on UCF-101 and Kinetics-400.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01075","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Neighbor2Inverse: Self-Supervised Denoising for Low-Dose Region-of-Interest Phase Contrast CT","primary_cat":"cs.CV","submitted_at":"2026-05-01T20:17:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Neighbor2Inverse adapts the Neighbor2Neighbor principle to train a denoising network directly in the image domain for low-dose PBI-CT by using independently noised subsampled projections.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00562","ref_index":7,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Depth-Guided Privacy-Preserving Visual Localization Using 3D Sphere Clouds","primary_cat":"cs.CV","submitted_at":"2026-05-01T10:59:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Sphere clouds neutralize density attacks on private 3D maps for visual localization while depth guidance from ToF sensors restores translation scale for accurate pose estimation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00256","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Remote SAMsing: From Segment Anything to Segment Everything","primary_cat":"cs.CV","submitted_at":"2026-04-30T21:44:03+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Remote SAMsing pipeline boosts SAM2 coverage on remote sensing scenes from 30-68% to 91-98% via multi-pass masking and boundary-aware merging while preserving mask quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27958","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"TripVVT: A Large-Scale Triplet Dataset and a Coarse-Mask Baseline for In-the-Wild Video Virtual Try-On","primary_cat":"cs.CV","submitted_at":"2026-04-30T14:53:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A new large-scale triplet dataset and diffusion transformer model using coarse human masks deliver improved video virtual try-on quality and generalization in challenging real-world conditions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27712","ref_index":39,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Linguistically Informed Multimodal Fusion for Vietnamese Scene-Text Image Captioning: Dataset, Graph Framework, and Phonological Attention","primary_cat":"cs.CV","submitted_at":"2026-04-30T10:57:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"UNKNOWN","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces ViTextCaps dataset and PhonoSTFG phonological graph fusion framework for Vietnamese scene-text image captioning, showing cross-modal graph edges harm performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20614","ref_index":14,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Too Sharp, Too Sure: When Calibration Follows Curvature","primary_cat":"cs.LG","submitted_at":"2026-04-22T14:28:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Calibration error tracks curvature via shared margin-dependent exponential tails; a margin-aware objective improves out-of-sample calibration across optimizers.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20474","ref_index":43,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Random Walk on Point Clouds for Feature Detection","primary_cat":"cs.CV","submitted_at":"2026-04-22T12:02:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RWoDSN extracts feature points from point clouds via a novel DSN descriptor and random walk graph analysis, reporting 22% higher recall than prior state-of-the-art with 0.784 precision.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19339","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Divide-and-Conquer Approach to Holistic Cognition in High-Similarity Contexts with Limited Data","primary_cat":"cs.CV","submitted_at":"2026-04-21T11:17:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DHCNet improves ultra-fine-grained visual categorization by progressively building holistic cognition from local discrepancies using self-shuffling and refinement on limited data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18208","ref_index":10,"ref_count":3,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Towards Symmetry-sensitive Pose Estimation: A Rotation Representation for Symmetric Object Classes","primary_cat":"cs.CV","submitted_at":"2026-04-20T12:55:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SARR modifies trigonometric rotation encodings with object symmetry orders to produce unique continuous poses, enabling standard CNNs to outperform existing methods on symmetry-aware 6D pose estimation without custom losses or 3D models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17688","ref_index":17,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Dual-stream Spatio-Temporal GCN-Transformer Network for 3D Human Pose Estimation","primary_cat":"cs.CV","submitted_at":"2026-04-20T01:07:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MixTGFormer reports state-of-the-art 3D pose estimation errors of 37.6 mm on Human3.6M and 15.7 mm on MPI-INF-3DHP by using parallel GCN-Transformer streams with SE layers for local-global feature fusion.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17208","ref_index":36,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CDSA-Net:Collaborative Decoupling of Vascular Structure and Background for High-Fidelity Coronary Digital Subtraction Angiography","primary_cat":"cs.CV","submitted_at":"2026-04-19T02:35:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CDSA-Net decouples vascular structure extraction and background restoration in coronary DSA via hierarchical geometric priors and adaptive noise modeling to eliminate artifacts while preserving tissue fidelity.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16919","ref_index":1,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Noise-Adaptive Diffusion Sampling for Inverse Problems Without Task-Specific Tuning","primary_cat":"cs.LG","submitted_at":"2026-04-18T08:53:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"N-HMC performs posterior sampling entirely in the initial noise space of diffusion models to solve inverse problems more robustly, with NA-NHMC adapting to unknown noise levels and outperforming prior methods on multiple tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15451","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Weak-to-Strong Knowledge Distillation Accelerates Visual Learning","primary_cat":"cs.CV","submitted_at":"2026-04-16T18:10:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Weak-to-strong knowledge distillation applied early and then turned off accelerates convergence to target performance in visual learning tasks by factors of 1.7-4.8x.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15184","ref_index":16,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Agent-Aided Design for Dynamic CAD Models","primary_cat":"cs.AI","submitted_at":"2026-04-16T16:15:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AADvark extends agent-aided CAD design to dynamic 3D assemblies with movable parts by integrating constraint solvers and visual feedback to create a verification signal for the agent.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14849","ref_index":1,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Efficient Search of Implantable Adaptive Cells for Medical Image Segmentation","primary_cat":"cs.CV","submitted_at":"2026-04-16T10:34:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"IAC-LTH accelerates IAC search for medical segmentation by progressively pruning unstable operations via Jensen-Shannon divergence on per-edge importance distributions, delivering comparable patient-level Dice scores with substantially lower wall-clock cost.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14338","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Path-Sampled Integrated Gradients","primary_cat":"cs.LG","submitted_at":"2026-04-15T18:46:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Path-sampled integrated gradients generalizes integrated gradients by averaging gradients over sampled baselines on the linear path, proving equivalence to a weighted version that improves convergence rate to O(m^{-1}) and reduces variance by a factor of 1/3 under uniform sampling.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14287","ref_index":144,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Quantum-inspired tensor networks in machine learning models","primary_cat":"cs.LG","submitted_at":"2026-04-15T18:00:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"Tensor networks developed for quantum states are reviewed as tools for machine learning models, with assessment of their potential computational, explanatory, and privacy advantages alongside remaining challenges.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.13792","ref_index":26,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Orthogonal Transformations for Efficient Data-Driven Reachability Analysis","primary_cat":"eess.SY","submitted_at":"2026-04-15T12:31:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Orthogonal transformations before order reduction in matrix zonotopes produce order-of-magnitude smaller reachable set volumes while keeping generator counts comparable.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12344","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"FRTSearch: Unified Detection and Parameter Inference of Fast Radio Transients using Instance Segmentation","primary_cat":"astro-ph.IM","submitted_at":"2026-04-14T06:31:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"FRTSearch reframes fast radio transient detection as instance segmentation on dynamic spectra and uses the segmented shapes to infer dispersion measure and time of arrival, achieving 98% recall with over 99.9% fewer false positives than traditional methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11685","ref_index":39,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Unfolding 3D Gaussian Splatting via Iterative Gaussian Synopsis","primary_cat":"cs.CV","submitted_at":"2026-04-13T16:28:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Iterative Gaussian Synopsis creates compact multi-level LOD hierarchies for 3D Gaussian Splatting via top-down unfolding with adaptive pruning, preserving quality while cutting storage.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.10586","ref_index":11,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Preventing Latent Rehearsal Decay in Online Continual SSL with SOLAR","primary_cat":"cs.LG","submitted_at":"2026-04-12T11:11:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SOLAR prevents latent rehearsal decay in online continual SSL by adaptively managing replay buffers with deviation proxies and an explicit overlap loss, delivering both fast convergence and state-of-the-art final accuracy on vision benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08405","ref_index":62,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SyncBreaker:Stage-Aware Multimodal Adversarial Attacks on Audio-Driven Talking Head Generation","primary_cat":"cs.CV","submitted_at":"2026-04-09T16:03:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SyncBreaker jointly attacks image and audio streams with Multi-Interval Sampling and Cross-Attention Fooling to degrade speech-driven talking head generation more than single-modality baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08230","ref_index":39,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Generalization Under Scrutiny: Cross-Domain Detection Progresses, Pitfalls, and Persistent Challenges","primary_cat":"cs.CV","submitted_at":"2026-04-09T13:21:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"A survey that organizes methods for cross-domain object detection into a taxonomy, analyzes domain shift across detection stages, and outlines persistent challenges.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.05256","ref_index":52,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Protecting and Preserving Protest Dynamics for Responsible Analysis","primary_cat":"cs.CV","submitted_at":"2026-04-06T23:46:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A responsible computing framework substitutes real protest imagery with labeled synthetic reproductions from conditional image synthesis to enable privacy-aware analysis of collective action patterns.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09694","ref_index":32,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"EDFNet: Early Fusion of Edge and Depth for Thin-Obstacle Segmentation in UAV Navigation","primary_cat":"cs.CV","submitted_at":"2026-04-06T15:26:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Early RGB-Depth-Edge fusion in EDFNet provides a competitive baseline for thin-obstacle segmentation on the DDOS dataset, with the best pretrained U-Net model reaching 0.244 Thin-Structure Evaluation Score.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.03339","ref_index":4,"ref_count":3,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Hierarchical Awareness Adapters with Hybrid Pyramid Feature Fusion for Dense Depth Prediction","primary_cat":"cs.CV","submitted_at":"2026-04-03T07:59:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A multilevel perceptual CRF model using Swin Transformer, HPF fusion, HA adapters, and dynamic scaling attention achieves state-of-the-art monocular depth estimation on NYU Depth v2, KITTI, and MatterPort3D with reduced error and fast inference.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.01479","ref_index":102,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"UniRecGen: Unifying Multi-View 3D Reconstruction and Generation","primary_cat":"cs.CV","submitted_at":"2026-04-01T23:35:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"UniRecGen unifies reconstruction and generation via shared canonical space and disentangled cooperative learning to produce complete, consistent 3D models from sparse views.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2503.20020","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Gemini Robotics: Bringing AI into the Physical World","primary_cat":"cs.RO","submitted_at":"2025-03-25T19:02:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Gemini Robotics is a Vision-Language-Action model for robot control that handles complex tasks robustly and adapts with minimal data, supported by an embodied reasoning extension.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2409.02813","ref_index":66,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MMMU-Pro: A More Robust Multi-discipline Multimodal Understanding Benchmark","primary_cat":"cs.CL","submitted_at":"2024-09-04T15:31:26+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":8.0,"formal_verification":"none","one_line_summary":"MMMU-Pro is a stricter multimodal benchmark that removes text-only solvable questions, augments options, and requires reading text from images, yielding substantially lower model scores of 16.8-26.9%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2305.10355","ref_index":37,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Evaluating Object Hallucination in Large Vision-Language Models","primary_cat":"cs.CV","submitted_at":"2023-05-17T16:34:01+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Large vision-language models exhibit severe object hallucination that varies with training instructions, and the proposed POPE polling method evaluates it more stably and flexibly than prior approaches.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}