{"total":14,"items":[{"citing_arxiv_id":"2605.13838","ref_index":129,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"R-DMesh: Video-Guided 3D Animation via Rectified Dynamic Mesh Flow","primary_cat":"cs.CV","submitted_at":"2026-05-13T17:58:13+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12399","ref_index":43,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"GeoQuery: Geometry-Query Diffusion for Sparse-View Reconstruction","primary_cat":"cs.CV","submitted_at":"2026-05-12T17:00:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GeoQuery replaces corrupted rendering features with geometry-aligned proxy queries and restricts cross-view attention to local windows, enabling robust diffusion-based refinement under extreme view sparsity.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05163","ref_index":16,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"PhysForge: Generating Physics-Grounded 3D Assets for Interactive Virtual World","primary_cat":"cs.CV","submitted_at":"2026-05-06T17:33:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PhysForge generates physics-grounded 3D assets via a VLM-planned Hierarchical Physical Blueprint and a KineVoxel Injection diffusion model, backed by the new PhysDB dataset of 150,000 annotated assets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02583","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Stylistic Attribute Control in Latent Diffusion Models","primary_cat":"cs.CV","submitted_at":"2026-05-04T13:34:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A technique for parametric stylistic control in latent diffusion models learns disentangled directions from synthetic datasets and applies them via guidance composition while preserving semantics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27422","ref_index":50,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Sparse-View 3D Gaussian Splatting in the Wild","primary_cat":"cs.CV","submitted_at":"2026-04-30T04:53:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A new sparse-view 3D Gaussian splatting method for unconstrained scenes with distractors combines diffusion-based reference-guided refinement and sparsity-aware Gaussian replication to achieve better rendering quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26917","ref_index":38,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"AnimateAnyMesh++: A Flexible 4D Foundation Model for High-Fidelity Text-Driven Mesh Animation","primary_cat":"cs.CV","submitted_at":"2026-04-29T17:27:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"AnimateAnyMesh++ animates arbitrary 3D meshes from text using an expanded 300K-identity DyMesh-XL dataset, a power-law topology-aware DyMeshVAE-Flex, and a variable-length rectified-flow generator to produce semantically accurate, temporally coherent animations in seconds.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19954","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens","primary_cat":"cs.CV","submitted_at":"2026-04-21T20:01:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Viewpoint tokens learned on a mixed 3D-rendered and photorealistic dataset enable precise camera control in text-to-image generation while factorizing geometry from appearance and transferring to unseen object categories.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17565","ref_index":61,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"UniGeo: Unifying Geometric Guidance for Camera-Controllable Image Editing via Video Models","primary_cat":"cs.CV","submitted_at":"2026-04-19T18:11:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"UniGeo unifies geometric guidance across three levels in video models to reduce geometric drift and improve consistency in camera-controllable image editing.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14302","ref_index":40,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Geometrically Consistent Multi-View Scene Generation from Freehand Sketches","primary_cat":"cs.CV","submitted_at":"2026-04-15T18:00:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A framework generates consistent multi-view scenes from one freehand sketch via a ~9k-sample dataset, Parallel Camera-Aware Attention Adapters, and Sparse Correspondence Supervision Loss, outperforming baselines in realism and consistency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.13856","ref_index":36,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Any3DAvatar: Fast and High-Quality Full-Head 3D Avatar Reconstruction from Single Portrait Image","primary_cat":"cs.CV","submitted_at":"2026-04-15T13:24:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Any3DAvatar reconstructs full-head 3D Gaussian avatars from one image via one-step denoising on a Plücker-aware scaffold plus auxiliary view supervision, beating prior single-image methods on fidelity while running substantially faster.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08760","ref_index":28,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SIC3D: Style Image Conditioned Text-to-3D Gaussian Splatting Generation","primary_cat":"cs.CV","submitted_at":"2026-04-09T20:50:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SIC3D generates text-to-3D objects with Gaussian splatting then stylizes them using Variational Stylized Score Distillation loss plus scaling regularization to improve style match and geometry fidelity.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08500","ref_index":34,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Novel View Synthesis as Video Completion","primary_cat":"cs.CV","submitted_at":"2026-04-09T17:44:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Video diffusion models can be adapted into permutation-invariant generators for sparse novel view synthesis by treating the problem as video completion and removing temporal order cues.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2405.05941","ref_index":61,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Evaluating Real-World Robot Manipulation Policies in Simulation","primary_cat":"cs.RO","submitted_at":"2024-05-09T17:30:16+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SIMPLER simulated environments yield policy performance that correlates strongly with real-world robot manipulation results and captures similar sensitivity to distribution shifts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2404.07191","ref_index":41,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models","primary_cat":"cs.CV","submitted_at":"2024-04-10T17:48:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"InstantMesh produces diverse, high-quality 3D meshes from single images in seconds by combining a multi-view diffusion model with a sparse-view large reconstruction model and optimizing directly on meshes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}