""" SAM 3D Body Gradio App - ZeroGPU Compatible This app handles all dependencies and provides a user-friendly interface for 3D body estimation. Optimized for Hugging Face Spaces with ZeroGPU support. """ import os import sys import subprocess import importlib.util def check_and_install_package(package_name, import_name=None, pip_name=None): """Check if a package is installed, if not, install it.""" if import_name is None: import_name = package_name if pip_name is None: pip_name = package_name spec = importlib.util.find_spec(import_name) if spec is None: print(f"Installing {package_name}...") subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name, "-q"]) print(f"✓ {package_name} installed successfully") return True # Install core dependencies print("Checking and installing dependencies...") check_and_install_package("gradio") check_and_install_package("spaces") # ZeroGPU support check_and_install_package("torch", pip_name="torch torchvision torchaudio") check_and_install_package("pytorch_lightning", "pytorch_lightning") check_and_install_package("cv2", "cv2", "opencv-python") check_and_install_package("numpy") check_and_install_package("PIL", "PIL", "Pillow") check_and_install_package("huggingface_hub") # Install additional dependencies additional_deps = [ "pyrender", "yacs", "scikit-image", "einops", "timm", "dill", "pandas", "rich", "hydra-core", "pyrootutils", "webdataset", "networkx==3.2.1", "roma", "joblib", "seaborn", "loguru", "pycocotools", "fvcore" ] for dep in additional_deps: try: pkg_name = dep.split("==")[0].replace("-", "_") check_and_install_package(pkg_name, pip_name=dep) except: pass print("Core dependencies installed!") import gradio as gr import cv2 import numpy as np from PIL import Image import torch import spaces # ZeroGPU decorator from huggingface_hub import hf_hub_download, login import warnings warnings.filterwarnings('ignore') class SAM3DBodyEstimator: """Wrapper class for SAM 3D Body estimation with ZeroGPU support.""" def __init__(self, hf_repo_id="facebook/sam-3d-body-dinov3"): self.hf_repo_id = hf_repo_id self.model = None self.faces = None self.initialized = False def setup(self, hf_token=None): """Setup the SAM 3D Body model (CPU operations only).""" try: if hf_token: login(token=hf_token) print("✓ Logged in to Hugging Face") # Try to import the SAM 3D Body utilities try: from notebook.utils import setup_sam_3d_body # Initialize model on CPU first, will move to GPU during inference self.model = setup_sam_3d_body(hf_repo_id=self.hf_repo_id) self.faces = self.model.faces self.initialized = True return "✓ Model loaded successfully! Ready for GPU inference." except ImportError: return "⚠️ SAM 3D Body package not found. Please install manually or provide installation path." except Exception as e: return f"❌ Error loading model: {str(e)}\n\nPlease ensure you have access to the Hugging Face repo and are authenticated." except Exception as e: return f"❌ Setup error: {str(e)}" @spaces.GPU(duration=120) # ZeroGPU decorator with 120s timeout def process_image(self, image): """Process an image and return 3D body estimation (GPU accelerated).""" if not self.initialized: return None, "❌ Model not initialized. Please setup first with your HF token." try: # Ensure model is on GPU if hasattr(self.model, 'to'): self.model.to('cuda') # Convert PIL to BGR img_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) # Process image (GPU operations happen here) img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) outputs = self.model.process_one_image(img_rgb) # Visualize results try: from tools.vis_utils import visualize_sample_together rend_img = visualize_sample_together(img_bgr, outputs, self.faces) result_img = Image.fromarray(cv2.cvtColor(rend_img.astype(np.uint8), cv2.COLOR_BGR2RGB)) # GPU is automatically released after this function completes return result_img, "✓ Processing completed successfully!" except ImportError: # Fallback visualization if vis_utils not available return image, "⚠️ Visualization utilities not found. Model processed but cannot render 3D output." except Exception as e: return None, f"❌ Processing error: {str(e)}" finally: # Clean up GPU memory if torch.cuda.is_available(): torch.cuda.empty_cache() # Initialize estimator estimator = SAM3DBodyEstimator() def setup_model(hf_token, model_choice): """Setup the SAM 3D Body model with HF token.""" repo_ids = { "DINOv3 (Recommended)": "facebook/sam-3d-body-dinov3", "ViT-H": "facebook/sam-3d-body-vith" } estimator.hf_repo_id = repo_ids[model_choice] return estimator.setup(hf_token) def process_uploaded_image(image): """Process uploaded image through SAM 3D Body (GPU allocated dynamically).""" if image is None: return None, "❌ Please upload an image first." return estimator.process_image(image) # Create Gradio interface with gr.Blocks(title="SAM 3D Body Estimator", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎯 SAM 3D Body Estimator (ZeroGPU) Generate 3D body meshes from single images using Meta's SAM 3D Body model. **Powered by Hugging Face Spaces ZeroGPU** - Dynamic GPU allocation for efficient inference! ### 📋 Setup Instructions: 1. Get access to the model on [Hugging Face](https://huggingface.co/facebook/sam-3d-body-dinov3) 2. Create a [Hugging Face token](https://huggingface.co/settings/tokens) with read access 3. Enter your token below and click "Initialize Model" 4. Upload an image and click "Process Image" ⚠️ **Note**: You need approved access to the SAM 3D Body repos on Hugging Face. ### ⚡ ZeroGPU Features: - **Dynamic GPU Allocation**: H200 GPU allocated only during inference - **Free GPU Access**: Available to all users with daily quotas - **PRO Benefits**: PRO users get 7x more quota (25 min/day vs 3.5 min/day) """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 🔧 Model Setup") hf_token_input = gr.Textbox( label="Hugging Face Token", placeholder="hf_...", type="password", info="Your HF token with read access" ) model_choice = gr.Radio( choices=["DINOv3 (Recommended)", "ViT-H"], value="DINOv3 (Recommended)", label="Model Selection" ) setup_btn = gr.Button("🚀 Initialize Model", variant="primary") setup_status = gr.Textbox(label="Setup Status", interactive=False) gr.Markdown("### 📸 Upload Image") input_image = gr.Image( label="Input Image", type="pil", sources=["upload", "webcam"] ) process_btn = gr.Button("▶️ Process Image (GPU)", variant="primary") process_status = gr.Textbox(label="Processing Status", interactive=False) with gr.Column(scale=1): gr.Markdown("### 🎨 Results") output_image = gr.Image(label="3D Body Estimation", type="pil") gr.Markdown(""" ### 💡 Tips: - Use clear, full-body images for best results - Ensure good lighting and minimal occlusion - Person should be facing the camera - High resolution images work better - Processing time: ~30-60 seconds per image ### 📊 GPU Usage: - **Duration**: Up to 120 seconds per inference - **VRAM**: 70GB H200 GPU available - **Queue**: Priority based on account tier """) gr.Markdown(""" --- ### 📚 Additional Information **Model Details:** - Paper: [SAM 3D Body](https://arxiv.org/abs/your-paper-link) - GitHub: [facebook/sam-3d-body](https://github.com/facebookresearch/sam-3d-body) **ZeroGPU Daily Quotas:** - Unauthenticated: 2 minutes - Free account: 3.5 minutes - PRO account: 25 minutes (7x more!) - Enterprise: 45 minutes **System Requirements:** - Python 3.10.13+ - PyTorch 2.1.0+ - Gradio 4+ - ZeroGPU Space (H200 GPU) **Troubleshooting:** - If model fails to load, ensure you have access to the HF repo - GPU allocation is dynamic - wait for your turn in queue - Check your daily quota if processing fails - Clear browser cache if interface doesn't load properly **About ZeroGPU:** This Space uses ZeroGPU, which dynamically allocates NVIDIA H200 GPUs only during inference. This maximizes efficiency and allows free GPU access for AI demos! """) # Event handlers setup_btn.click( fn=setup_model, inputs=[hf_token_input, model_choice], outputs=setup_status ) process_btn.click( fn=process_uploaded_image, inputs=input_image, outputs=[output_image, process_status] ) # Launch the app if __name__ == "__main__": print("\n" + "="*60) print("🚀 Starting SAM 3D Body Gradio App (ZeroGPU)") print("="*60 + "\n") demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )