{"total":19,"items":[{"citing_arxiv_id":"2605.13838","ref_index":132,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"R-DMesh: Video-Guided 3D Animation via Rectified Dynamic Mesh Flow","primary_cat":"cs.CV","submitted_at":"2026-05-13T17:58:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"R-DMesh uses a VAE with a learned rectification jump offset and Triflow Attention inside a rectified-flow diffusion transformer to produce video-aligned 4D meshes despite initial pose misalignment.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09620","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MiXR: Harvesting and Recomposing Geometry from Real-World Objects for In-Situ 3D Design","primary_cat":"cs.HC","submitted_at":"2026-05-10T16:06:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MiXR enables in-situ 3D design by harvesting real-world geometry for user-defined compositions that generative AI then refines, outperforming text-only generative methods in control and fidelity per a 12-person study.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09606","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"On the Generation and Mitigation of Harmful Geometry in Image-to-3D Models","primary_cat":"cs.CR","submitted_at":"2026-05-10T15:35:42+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"Image-to-3D models successfully generate harmful geometries in most cases with under 0.3% caught by commercial filters; existing safeguards are weak but a stacked defense cuts harmful outputs to under 1% at 11% false-positive cost.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08744","ref_index":40,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MeshFIM: Local Low-Poly Mesh Editing via Fill-in-the-Middle Autoregressive Generation","primary_cat":"cs.GR","submitted_at":"2026-05-09T07:11:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MeshFIM enables local low-poly mesh editing by autoregressively filling target regions conditioned on context, using boundary markers, positional embeddings, and a gated geometry encoder to enforce attachment, topology, and region limits.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04524","ref_index":158,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"High-Fidelity Single-Image Head Modeling with Industry-Grade Topology","primary_cat":"cs.CV","submitted_at":"2026-05-06T06:07:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A single-image head reconstruction method uses coarse-to-fine optimization with normal consistency, landmarks, and geometry-aware constraints on curvature and conformality to produce meshes with industry-grade topology and preserved facial identity.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04412","ref_index":41,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Structured 3D Latents Are Surprisingly Powerful: Unleashing Generalizable Style with 2D Diffusion","primary_cat":"cs.CV","submitted_at":"2026-05-06T02:08:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DiLAST optimizes 3D latents via guidance from a 2D diffusion model to enable generalizable style transfer for OOD styles in 3D asset generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04035","ref_index":24,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Large-Scale High-Quality 3D Gaussian Head Reconstruction from Multi-View Captures","primary_cat":"cs.CV","submitted_at":"2026-05-05T17:55:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"HeadsUp maps multi-view captures to UV-parameterized 3D Gaussians on a template via an encoder-decoder, achieving state-of-the-art quality and generalization after training on more than 10,000 subjects.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00345","ref_index":14,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Pose-Aware Diffusion for 3D Generation","primary_cat":"cs.CV","submitted_at":"2026-05-01T02:05:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PAD synthesizes 3D geometry in observation space via depth unprojection as anchor to eliminate pose ambiguity in image-to-3D generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26917","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"AnimateAnyMesh++: A Flexible 4D Foundation Model for High-Fidelity Text-Driven Mesh Animation","primary_cat":"cs.CV","submitted_at":"2026-04-29T17:27:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"AnimateAnyMesh++ animates arbitrary 3D meshes from text using an expanded 300K-identity DyMesh-XL dataset, a power-law topology-aware DyMeshVAE-Flex, and a variable-length rectified-flow generator to produce semantically accurate, temporally coherent animations in seconds.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19257","ref_index":11,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Unposed-to-3D: Learning Simulation-Ready Vehicles from Real-World Images","primary_cat":"cs.CV","submitted_at":"2026-04-21T09:20:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Unposed-to-3D learns simulation-ready 3D vehicle models from unposed real images by predicting camera parameters for photometric self-supervision, then adding scale prediction and harmonization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18747","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"URoPE: Universal Relative Position Embedding across Geometric Spaces","primary_cat":"cs.CV","submitted_at":"2026-04-20T18:52:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"URoPE is a parameter-free relative position embedding for transformers that works across arbitrary geometric spaces by ray sampling and projection, yielding consistent gains on novel view synthesis, 3D detection, tracking, and depth estimation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17472","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"UniMesh: Unifying 3D Mesh Understanding and Generation","primary_cat":"cs.CV","submitted_at":"2026-04-19T14:53:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"UniMesh unifies 3D mesh generation and understanding in one model via a Mesh Head interface, Chain of Mesh iterative editing, and an Actor-Evaluator self-reflection loop.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16299","ref_index":30,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Repurposing 3D Generative Model for Autoregressive Layout Generation","primary_cat":"cs.CV","submitted_at":"2026-04-17T17:59:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LaviGen turns 3D generative models into an autoregressive layout generator that models geometric and physical constraints, delivering 19% higher physical plausibility and 65% faster inference on the LayoutVLM benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15239","ref_index":17,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"TokenGS: Decoupling 3D Gaussian Prediction from Pixels with Learnable Tokens","primary_cat":"cs.CV","submitted_at":"2026-04-16T17:12:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"TokenGS uses learnable Gaussian tokens in an encoder-decoder architecture to regress 3D means directly, achieving SOTA feed-forward reconstruction on static and dynamic scenes with better robustness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12309","ref_index":11,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Towards Realistic and Consistent Orbital Video Generation via 3D Foundation Priors","primary_cat":"cs.CV","submitted_at":"2026-04-14T05:35:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A video generation approach conditions a base model with multi-scale 3D latent features and a cross-attention adapter to produce geometrically realistic and consistent orbital videos from one image.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.10259","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Real-Time Human Reconstruction and Animation using Feed-Forward Gaussian Splatting","primary_cat":"cs.CV","submitted_at":"2026-04-11T15:52:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A feed-forward network predicts per-SMPL-X-vertex 3D Gaussians in canonical space from multi-view RGB images, enabling single-pass reconstruction and real-time animation via linear blend skinning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07053","ref_index":14,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"AnchorSplat: Feed-Forward 3D Gaussian Splatting with 3D Geometric Priors","primary_cat":"cs.CV","submitted_at":"2026-04-08T13:04:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AnchorSplat uses anchor-aligned 3D Gaussians guided by geometric priors for feed-forward scene reconstruction, achieving SOTA novel view synthesis on ScanNet++ with fewer primitives and better view consistency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06773","ref_index":33,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MemoryDiorama: Generating Dynamic 3D Diorama from Everyday Photos for Memory Recall","primary_cat":"cs.HC","submitted_at":"2026-04-08T07:41:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MemoryDiorama generates animated 3D dioramas from photos via LLM scene analysis and generative components, yielding richer autobiographical recall than photo-only or static diorama baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.05182","ref_index":22,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"LSRM: High-Fidelity Object-Centric Reconstruction via Scaled Context Windows","primary_cat":"cs.CV","submitted_at":"2026-04-06T21:21:12+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LSRM scales transformer context windows with native sparse attention and geometric routing to deliver high-fidelity feed-forward 3D reconstruction and inverse rendering that approaches dense optimization quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}