"""SHARP Gradio demo (minimal, responsive UI). This Space: - Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image. - Exports a canonical `.ply` file for download. - Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only). Precompiled examples Place precompiled examples under `assets/examples/`. Recommended structure (matching stem): assets/examples/.jpg|png|webp assets/examples/.mp4 assets/examples/.ply Optional manifest (assets/examples/manifest.json): [ {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"}, ... ] """ from __future__ import annotations import json from dataclasses import dataclass from pathlib import Path from typing import Final import gradio as gr from model_utils import TrajectoryType, predict_and_maybe_render_gpu # ----------------------------------------------------------------------------- # Paths & constants # ----------------------------------------------------------------------------- APP_DIR: Final[Path] = Path(__file__).resolve().parent OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs" ASSETS_DIR: Final[Path] = APP_DIR / "assets" EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples" IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp") DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32 THEME: Final = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", ) CSS: Final[str] = """ /* Keep layout stable when scrollbars appear/disappear */ html { scrollbar-gutter: stable; } /* Use normal document flow (no fixed-height viewport shell) */ html, body { height: auto; } body { overflow: auto; } /* Comfortable max width; still fills small screens */ .gradio-container { max-width: 1400px; margin: 0 auto; padding: 0.75rem 1rem 1rem; box-sizing: border-box; } /* Make media components responsive without stretching */ #run-image, #run-video, #examples-image, #examples-video { width: 100%; } /* Keep aspect ratio and prevent runaway vertical growth on tall viewports */ #run-image img, #examples-image img { width: 100%; height: auto; max-height: 70vh; object-fit: contain; } #run-video video, #examples-video video { width: 100%; height: auto; max-height: 70vh; object-fit: contain; } /* On very small screens, reduce max media height a bit */ @media (max-width: 640px) { #run-image img, #examples-image img, #run-video video, #examples-video video { max-height: 55vh; } } /* Reduce extra whitespace in markdown blocks */ .gr-markdown > :first-child { margin-top: 0 !important; } .gr-markdown > :last-child { margin-bottom: 0 !important; } """ # ----------------------------------------------------------------------------- # Helpers # ----------------------------------------------------------------------------- def _ensure_dir(path: Path) -> Path: path.mkdir(parents=True, exist_ok=True) return path @dataclass(frozen=True, slots=True) class ExampleSpec: """A precompiled example bundle (image + optional mp4 + optional ply).""" label: str image: Path video: Path | None ply: Path | None def _normalize_key(path: str) -> str: """Normalize a path-like string for stable dictionary keys.""" try: return str(Path(path).resolve()) except Exception: return path def _load_manifest(manifest_path: Path) -> list[dict]: """Load manifest.json if present; return an empty list on errors.""" try: data = json.loads(manifest_path.read_text(encoding="utf-8")) if not isinstance(data, list): raise ValueError("manifest.json must contain a JSON list.") return [x for x in data if isinstance(x, dict)] except FileNotFoundError: return [] except Exception as e: # Manifest errors should not crash the app. print(f"[examples] Failed to parse manifest.json: {type(e).__name__}: {e}") return [] def discover_examples(examples_dir: Path) -> list[ExampleSpec]: """Discover example bundles under assets/examples/.""" _ensure_dir(examples_dir) manifest_rows = _load_manifest(examples_dir / "manifest.json") if manifest_rows: specs: list[ExampleSpec] = [] for row in manifest_rows: label = str(row.get("label") or "Example").strip() or "Example" image_rel = row.get("image") if not image_rel: continue image = (examples_dir / str(image_rel)).resolve() if not image.exists(): continue video = None ply = None if row.get("video"): v = (examples_dir / str(row["video"])).resolve() if v.exists(): video = v if row.get("ply"): p = (examples_dir / str(row["ply"])).resolve() if p.exists(): ply = p specs.append(ExampleSpec(label=label, image=image, video=video, ply=ply)) return specs # Fallback: infer bundles by filename stem images: list[Path] = [] for ext in IMAGE_EXTS: images.extend(sorted(examples_dir.glob(f"*{ext}"))) specs = [] for img in images: stem = img.stem video = examples_dir / f"{stem}.mp4" ply = examples_dir / f"{stem}.ply" specs.append( ExampleSpec( label=stem.replace("_", " ").strip() or stem, image=img.resolve(), video=video.resolve() if video.exists() else None, ply=ply.resolve() if ply.exists() else None, ) ) return specs _ensure_dir(OUTPUTS_DIR) EXAMPLE_SPECS: Final[list[ExampleSpec]] = discover_examples(EXAMPLES_DIR) EXAMPLE_INDEX_BY_PATH: Final[dict[str, ExampleSpec]] = { _normalize_key(str(s.image)): s for s in EXAMPLE_SPECS } EXAMPLE_INDEX_BY_NAME: Final[dict[str, ExampleSpec]] = { s.image.name: s for s in EXAMPLE_SPECS } def load_example_assets( image_path: str | None, ) -> tuple[str | None, str | None, str | None, str]: """Return (image, video, ply_path, status) for the selected example image.""" if not image_path: return None, None, None, "No example selected." spec = EXAMPLE_INDEX_BY_PATH.get(_normalize_key(image_path)) if spec is None: spec = EXAMPLE_INDEX_BY_NAME.get(Path(image_path).name) if spec is None: return image_path, None, None, "No matching example bundle found." video = str(spec.video) if spec.video is not None else None ply_path = str(spec.ply) if spec.ply is not None else None missing: list[str] = [] if video is None: missing.append("MP4") if ply_path is None: missing.append("PLY") msg = f"Loaded example: **{spec.label}**." if missing: msg += f" Missing: {', '.join(missing)}." return str(spec.image), video, ply_path, msg def _validate_image(image_path: str | None) -> None: if not image_path: raise gr.Error("Upload an image first.") def run_sharp( image_path: str | None, trajectory_type: TrajectoryType, output_long_side: int, num_frames: int, fps: int, render_video: bool, ) -> tuple[str | None, str | None, str]: """Run SHARP inference and return (video_path, ply_path, status_markdown).""" _validate_image(image_path) out_long_side: int | None = ( None if int(output_long_side) <= 0 else int(output_long_side) ) try: video_path, ply_path = predict_and_maybe_render_gpu( image_path, trajectory_type=trajectory_type, num_frames=int(num_frames), fps=int(fps), output_long_side=out_long_side, render_video=bool(render_video), ) lines: list[str] = [f"**PLY:** `{ply_path.name}` (ready to download)"] if render_video: if video_path is None: lines.append("**Video:** not rendered (CUDA unavailable).") else: lines.append(f"**Video:** `{video_path.name}`") else: lines.append("**Video:** disabled.") return ( str(video_path) if video_path is not None else None, str(ply_path), "\n".join(lines), ) except gr.Error: raise except Exception as e: raise gr.Error(f"SHARP failed: {type(e).__name__}: {e}") from e # ----------------------------------------------------------------------------- # UI # ----------------------------------------------------------------------------- def build_demo() -> gr.Blocks: with gr.Blocks( title="SHARP • Single-Image 3D Gaussian Prediction", elem_id="sharp-root", fill_height=True, ) as demo: gr.Markdown("## SHARP\nSingle-image **3D Gaussian scene** prediction.") # Run tab components are referenced by Examples tab, so keep them in outer scope. with gr.Column(elem_id="tabs-shell"): with gr.Tabs(): with gr.Tab("Run", id="run"): with gr.Column(elem_id="run-panel"): with gr.Row(equal_height=True, elem_id="run-media-row"): with gr.Column( scale=5, min_width=360, elem_id="run-left-col" ): image_in = gr.Image( label="Input image", type="filepath", sources=["upload"], elem_id="run-image", ) with gr.Row(): trajectory = gr.Dropdown( label="Trajectory", choices=[ "swipe", "shake", "rotate", "rotate_forward", ], value="rotate_forward", ) output_res = gr.Dropdown( label="Output long side", info="0 = match input", choices=[ ("Match input", 0), ("512", 512), ("768", 768), ("1024", 1024), ("1280", 1280), ("1536", 1536), ], value=0, ) with gr.Row(): frames = gr.Slider( label="Frames", minimum=24, maximum=120, step=1, value=60, ) fps_in = gr.Slider( label="FPS", minimum=8, maximum=60, step=1, value=30, ) render_toggle = gr.Checkbox( label="Render MP4 (CUDA / ZeroGPU only)", value=True, ) with gr.Column( scale=5, min_width=360, elem_id="run-right-col" ): video_out = gr.Video( label="Trajectory video (MP4)", elem_id="run-video", ) with gr.Row(elem_id="run-download-row"): ply_download = gr.DownloadButton( label="Download PLY (.ply)", value=None, visible=True, elem_id="run-ply-download", ) status_md = gr.Markdown("", elem_id="run-status") with gr.Row(elem_id="run-actions-row"): run_btn = gr.Button("Generate", variant="primary") clear_btn = gr.ClearButton( [image_in, video_out, ply_download, status_md], value="Clear", ) # Ensure clearing also clears any previous download target. clear_btn.click( fn=lambda: None, outputs=[ply_download], queue=False, ) run_btn.click( fn=run_sharp, inputs=[ image_in, trajectory, output_res, frames, fps_in, render_toggle, ], outputs=[video_out, ply_download, status_md], api_visibility="public", ) with gr.Tab("Examples", id="examples"): with gr.Column(elem_id="examples-panel"): if EXAMPLE_SPECS: gr.Markdown( "Click an example to preview precompiled outputs. " "The example image will also be loaded into the Run tab." ) # Define preview outputs first (unrendered), so we can reference them from gr.Examples. ex_img = gr.Image( label="Example image", type="filepath", interactive=False, render=False, height=360, elem_id="examples-image", ) ex_vid = gr.Video( label="Pre-rendered MP4", render=False, height=360, elem_id="examples-video", ) ex_ply = gr.DownloadButton( label="Download PLY (.ply)", value=None, visible=True, render=False, elem_id="examples-ply-download", ) ex_status = gr.Markdown( render=False, elem_id="examples-status" ) with gr.Row(equal_height=True): with gr.Column(scale=4, min_width=320): gr.Examples( examples=[ [str(s.image)] for s in EXAMPLE_SPECS ], example_labels=[s.label for s in EXAMPLE_SPECS], inputs=[image_in], outputs=[ex_img, ex_vid, ex_ply, ex_status], fn=load_example_assets, cache_examples=False, run_on_click=True, examples_per_page=10, label=None, ) with gr.Column(scale=6, min_width=360): ex_img.render() ex_vid.render() ex_ply.render() ex_status.render() gr.Markdown( "Add example bundles under `assets/examples/` " "(image + mp4 + ply) or provide a `manifest.json`." ) else: gr.Markdown( "No precompiled examples found.\n\n" "Add files under `assets/examples/`:\n" "- `example.jpg` (or png/webp)\n" "- `example.mp4`\n" "- `example.ply`\n\n" "Optionally add `assets/examples/manifest.json` to define labels and filenames." ) with gr.Tab("About", id="about"): with gr.Column(elem_id="about-panel"): gr.Markdown( """ *Sharp Monocular View Synthesis in Less Than a Second* (Apple, 2025) ```bibtex @inproceedings{Sharp2025:arxiv, title = {Sharp Monocular View Synthesis in Less Than a Second}, author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\\"{e}l Delaunoyand Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun}, journal = {arXiv preprint arXiv:2512.10685}, year = {2025}, url = {https://arxiv.org/abs/2512.10685}, } ``` """.strip() ) demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1) return demo demo = build_demo() if __name__ == "__main__": demo.launch(theme=THEME, css=CSS)