Spaces:

seungminkwak
/

puppeteer-api

Paused

App Files Files Community

seungminkwak commited on Nov 5

Commit

08b23ce

0 Parent(s):

reset: clean history (purge leaked token)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +36 -0
.gitignore +12 -0
Dockerfile +85 -0
README.md +12 -0
app.py +344 -0
app_backup.py +52 -0
app_backup_encoding.py +52 -0
requirements.txt +27 -0
samples/demo.mp4 +3 -0
test_puppeteer_api_v2.ps1 +57 -0
third_party/Puppeteer/.gitmodules +15 -0
third_party/Puppeteer/LICENSE +201 -0
third_party/Puppeteer/README.md +105 -0
third_party/Puppeteer/animation/README.md +76 -0
third_party/Puppeteer/animation/demo.sh +7 -0
third_party/Puppeteer/animation/download.py +13 -0
third_party/Puppeteer/animation/model.py +199 -0
third_party/Puppeteer/animation/optimization.py +626 -0
third_party/Puppeteer/animation/renderer.py +348 -0
third_party/Puppeteer/animation/utils/cameras/back.json +1 -0
third_party/Puppeteer/animation/utils/cameras/back_left.json +64 -0
third_party/Puppeteer/animation/utils/cameras/back_right.json +64 -0
third_party/Puppeteer/animation/utils/cameras/front.json +1 -0
third_party/Puppeteer/animation/utils/cameras/front_left.json +64 -0
third_party/Puppeteer/animation/utils/cameras/front_right.json +64 -0
third_party/Puppeteer/animation/utils/cameras/left.json +1 -0
third_party/Puppeteer/animation/utils/cameras/right.json +1 -0
third_party/Puppeteer/animation/utils/data_loader.py +170 -0
third_party/Puppeteer/animation/utils/loss_utils.py +420 -0
third_party/Puppeteer/animation/utils/misc.py +34 -0
third_party/Puppeteer/animation/utils/quat_utils.py +179 -0
third_party/Puppeteer/animation/utils/render_first_frame.py +93 -0
third_party/Puppeteer/animation/utils/save_flow.py +297 -0
third_party/Puppeteer/animation/utils/save_utils.py +374 -0
third_party/Puppeteer/checkpoints/rig.ckpt +3 -0
third_party/Puppeteer/demo_animation.sh +63 -0
third_party/Puppeteer/demo_rigging.sh +117 -0
third_party/Puppeteer/requirements.txt +29 -0
third_party/Puppeteer/skeleton/README.md +93 -0
third_party/Puppeteer/skeleton/data_utils/README.md +43 -0
third_party/Puppeteer/skeleton/data_utils/convert_npz_to_mesh_rig.py +107 -0
third_party/Puppeteer/skeleton/data_utils/data_loader.py +122 -0
third_party/Puppeteer/skeleton/data_utils/pyrender_wrapper.py +144 -0
third_party/Puppeteer/skeleton/data_utils/read_npz.py +43 -0
third_party/Puppeteer/skeleton/data_utils/read_rig_mesh_from_glb.py +198 -0
third_party/Puppeteer/skeleton/data_utils/render_data.py +61 -0
third_party/Puppeteer/skeleton/data_utils/save_npz.py +256 -0
third_party/Puppeteer/skeleton/demo.py +219 -0
third_party/Puppeteer/skeleton/demo.sh +19 -0
third_party/Puppeteer/skeleton/download.py +25 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+# local test scripts
+*test_puppeteer_api*.ps1
+logs/
+results/
+# local test scripts
+*test_puppeteer_api*.ps1
+logs/
+results/

Dockerfile ADDED Viewed

	@@ -0,0 +1,85 @@

+# ============================================================
+# Puppeteer GPU API Dockerfile (Final, CUDA 11.8 + A10G Ready)
+# ============================================================
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+# ------------------------------------------------------------
+# OS Dependencies
+# ------------------------------------------------------------
+RUN apt-get update && apt-get install -y \
+    python3 python3-pip python3-venv \
+    git wget curl unzip ffmpeg bash \
+    libgl1 libglib2.0-0 \
+ && rm -rf /var/lib/apt/lists/* \
+ && ln -sf /usr/bin/python3 /usr/bin/python
+# ------------------------------------------------------------
+# Environment Variables
+# ------------------------------------------------------------
+ENV PIP_NO_CACHE_DIR=1 \
+    PYTHONUNBUFFERED=1 \
+    # OMP 경고 제거 및 단일 스레드 고정 (libgomp 에러 회피)
+    OMP_NUM_THREADS=1 \
+    MKL_THREADING_LAYER=SEQUENTIAL \
+    # 입출력 경로
+    TMP_IN_DIR=/data/in \
+    RESULT_DIR=/data/results
+RUN python -m pip install --upgrade pip
+# ------------------------------------------------------------
+# Build Cache Busting (optional, force rebuild)
+# ------------------------------------------------------------
+ARG CACHE_BUST=2025-11-05-01-30
+# ------------------------------------------------------------
+# Work Directory
+# ------------------------------------------------------------
+WORKDIR /app
+# ------------------------------------------------------------
+# Python Dependencies (Torch 제외)
+# ------------------------------------------------------------
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+# ------------------------------------------------------------
+# CUDA용 PyTorch 설치 (cu118)
+# ------------------------------------------------------------
+RUN pip uninstall -y torch torchvision torchaudio || true
+RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu118 \
+    torch torchvision torchaudio
+# ------------------------------------------------------------
+# App Source
+# ------------------------------------------------------------
+COPY app.py /app/app.py
+# ------------------------------------------------------------
+# Puppeteer Vendor (벤더 코드)
+# ------------------------------------------------------------
+COPY third_party/Puppeteer /app/Puppeteer
+RUN chmod +x /app/Puppeteer/demo_rigging.sh || true
+# PYTHONPATH: app / Puppeteer / third_party
+ENV PYTHONPATH=/app:/app/Puppeteer:/app/Puppeteer/third_party:$PYTHONPATH
+# 일부 코드가 'third_partys' 를 import 하는 경우 대비
+RUN ln -s /app/Puppeteer/third_party /app/third_partys || true \
+ && touch /app/Puppeteer/third_party/__init__.py
+# ------------------------------------------------------------
+# Writable Paths
+# ------------------------------------------------------------
+RUN mkdir -p /data/in /data/results && chmod -R 777 /data
+# ------------------------------------------------------------
+# ------------------------------------------------------------
+# Entrypoint (cd /app 보장)
+# ------------------------------------------------------------
+RUN printf '#!/bin/bash\nset -euo pipefail\ncd /app\npython -c "import importlib, sys; import app; print(\\"[boot] app imported OK\\")" || exit 1\nuvicorn app:app --host 0.0.0.0 --port ${PORT:-7860}\n' > /app/run.sh \
+ && chmod +x /app/run.sh
+EXPOSE 7860
+CMD ["sh", "-c", "/app/run.sh"]

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Puppeteer Api
+emoji: 🏆
+colorFrom: yellow
+colorTo: blue
+sdk: docker
+pinned: false
+license: mit
+short_description: puppeteer-api
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,344 @@

+import os
+import re
+import sys
+import shutil
+import subprocess
+from pathlib import Path
+from typing import List, Optional
+import importlib.util
+import requests
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
+from pydantic import BaseModel, HttpUrl, Field
+# ----------------------------------------------------------------------------- #
+# 🔧 환경 고정: libgomp 경고/에러 회피 (invalid OMP_NUM_THREADS)
+# ----------------------------------------------------------------------------- #
+# 일부 컨테이너에서 OMP_NUM_THREADS가 비어있거나 잘못 들어가면 libgomp가 에러를 냅니다.
+# 안전하게 정수값으로 강제 세팅합니다.
+os.environ["OMP_NUM_THREADS"] = os.environ.get("OMP_NUM_THREADS", "4")
+if not os.environ["OMP_NUM_THREADS"].isdigit():
+    os.environ["OMP_NUM_THREADS"] = "4"
+# ----------------------------------------------------------------------------- #
+# 🔧 런타임 의존성 자동 설치 (tqdm, einops, scipy, trimesh 등)
+#  - requirements/Dockerfile에 빠진 경우를 대비해, 서버 기동 시 한 번 체크해서 설치
+# ----------------------------------------------------------------------------- #
+RUNTIME_DEPS = [
+    "tqdm",
+    "einops",
+    "scipy",
+    "trimesh",
+    "accelerate",   # 추가
+    "timm",         # 추가
+    # 아래는 여유 패키지 (에러 나면 자동 보강)
+    "networkx",
+    "scikit-image",
+]
+def _need_install(mod_name: str) -> bool:
+    return importlib.util.find_spec(mod_name) is None
+def _pip_install(pkgs: List[str]) -> None:
+    if not pkgs:
+        return
+    try:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", *pkgs])
+    except Exception as e:
+        print(f"[deps] pip install failed for {pkgs}: {e}")
+def _ensure_runtime_deps() -> None:
+    # numpy 2.x면 scipy 등과 충돌 가능 → numpy<2로 내리는 시도
+    try:
+        import numpy as _np
+        if _np.__version__.startswith("2"):
+            print(f"[deps] numpy=={_np.__version__} detected; attempting to pin <2.0")
+            _pip_install(["numpy<2"])
+    except Exception as e:
+        print(f"[deps] numpy check failed: {e}")
+    # 필수 모듈 채우기
+    missing = [m for m in RUNTIME_DEPS if _need_install(m)]
+    if missing:
+        print(f"[deps] installing missing modules: {missing}")
+        _pip_install(missing)
+    # 최종 확인 로그
+    for m in RUNTIME_DEPS:
+        print(f"[deps] {m} -> {'OK' if not _need_install(m) else 'MISSING'}")
+_ensure_runtime_deps()
+# ----------------------------------------------------------------------------- #
+# FastAPI 초기화
+# ----------------------------------------------------------------------------- #
+app = FastAPI(title="Puppeteer API", version="1.0.0")
+# ----------------------------------------------------------------------------- #
+# Settings
+# ----------------------------------------------------------------------------- #
+PUPPETEER_SRC = Path(os.environ.get("PUPPETEER_DIR", "/app/Puppeteer"))         # 읽기 전용 원본
+PUPPETEER_RUN = Path(os.environ.get("PUPPETEER_RUN", "/tmp/puppeteer_run"))     # 실행용 복사본(쓰기 가능)
+RESULT_DIR = Path(os.environ.get("RESULT_DIR", str(PUPPETEER_RUN / "results")))  # rig 결과 기본 경로
+TMP_IN_DIR = Path(os.environ.get("TMP_IN_DIR", "/tmp/in"))                       # 입력 저장 경로
+DOWNLOAD_TIMEOUT = int(os.environ.get("DOWNLOAD_TIMEOUT", "180"))
+MAX_DOWNLOAD_MB = int(os.environ.get("MAX_DOWNLOAD_MB", "512"))
+SAFE_NAME = re.compile(r"[^A-Za-z0-9._-]+")
+# 애니메이션/리깅 결과를 폭넓게 찾기 위한 후보 경로
+RESULT_BASES = [
+    Path("/app/Puppeteer/results"),
+    RESULT_DIR,
+    Path("/data/results"),
+    Path("/tmp/puppeteer_run/results"),
+]
+# ----------------------------------------------------------------------------- #
+# Auto-download checkpoints (런타임 시 자동 다운로드)
+# ----------------------------------------------------------------------------- #
+ckpt_path = Path("/app/Puppeteer/checkpoints")
+if not ckpt_path.exists() or not any(ckpt_path.iterdir()):
+    try:
+        print("[init] checkpoints missing — trying runtime download via script...")
+        subprocess.run(
+            ["bash", "-lc", "cd /app/Puppeteer && ./scripts/download_ckpt.sh"],
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+        print("[init] Puppeteer checkpoints downloaded successfully via script.")
+    except Exception as e:
+        print("[init] WARNING: checkpoint script failed:", e)
+        try:
+            ckpt_path.mkdir(parents=True, exist_ok=True)
+            print("[init] trying manual download from GitHub release...")
+            subprocess.run(
+                [
+                    "wget",
+                    "-O",
+                    "/app/Puppeteer/checkpoints/rig.ckpt",
+                    "https://github.com/ByteDance-Seed/Puppeteer/releases/download/v1.0.0/rig.ckpt",
+                ],
+                check=True,
+            )
+            print("[init] rig.ckpt downloaded manually.")
+        except Exception as e2:
+            print("[init] WARNING: manual checkpoint download failed:", e2)
+# ----------------------------------------------------------------------------- #
+# Schemas
+# ----------------------------------------------------------------------------- #
+class RigIn(BaseModel):
+    mesh_url: HttpUrl = Field(..., description="Input mesh URL (obj/glb/fbx/…)")
+    workdir: Optional[str] = Field(default=None, description="Optional work directory name")
+class RigOut(BaseModel):
+    status: str
+    result_dir: Optional[str] = None
+    files_preview: Optional[List[str]] = None
+    detail: Optional[str] = None
+    gpu: Optional[bool] = None
+    gpu_name: Optional[str] = None
+class AnimateIn(BaseModel):
+    video_url: HttpUrl = Field(..., description="Input video URL (mp4, mov, etc.)")
+    mesh_path: Optional[str] = Field(
+        default="/app/Puppeteer/results/rigged.glb",
+        description="Path to rigged mesh"
+    )
+# ----------------------------------------------------------------------------- #
+# Utils
+# ----------------------------------------------------------------------------- #
+def ensure_dirs() -> None:
+    TMP_IN_DIR.mkdir(parents=True, exist_ok=True)
+    PUPPETEER_RUN.mkdir(parents=True, exist_ok=True)
+    RESULT_DIR.mkdir(parents=True, exist_ok=True)
+def prepare_run_tree() -> None:
+    if not PUPPETEER_SRC.exists():
+        raise HTTPException(status_code=500, detail=f"Puppeteer not found: {PUPPETEER_SRC}")
+    shutil.copytree(PUPPETEER_SRC, PUPPETEER_RUN, dirs_exist_ok=True)
+    script = PUPPETEER_RUN / "demo_rigging.sh"
+    if script.exists():
+        script.chmod(0o755)
+def safe_basename(url: str) -> str:
+    name = os.path.basename(url.split("?")[0])
+    return SAFE_NAME.sub("_", name) or "input_mesh"
+def download_with_limit(url: str, dst: Path) -> None:
+    with requests.get(url, stream=True, timeout=DOWNLOAD_TIMEOUT) as r:
+        r.raise_for_status()
+        total = 0
+        with open(dst, "wb") as f:
+            for chunk in r.iter_content(chunk_size=1024 * 1024):
+                if not chunk:
+                    continue
+                total += len(chunk)
+                if total > MAX_DOWNLOAD_MB * 1024 * 1024:
+                    raise HTTPException(status_code=413, detail="File too large")
+                f.write(chunk)
+def torch_info() -> tuple[bool, Optional[str]]:
+    try:
+        import torch
+        ok = torch.cuda.is_available()
+        name = torch.cuda.get_device_name(0) if ok else None
+        return ok, name
+    except Exception:
+        return False, None
+def scan_results(limit: int = 200) -> List[str]:
+    files: List[str] = []
+    exts = ("*.glb", "*.mp4", "*.fbx", "*.obj", "*.gltf", "*.png", "*.jpg", "*.json", "*.txt")
+    for base in RESULT_BASES:
+        if base.exists():
+            for ext in exts:
+                for p in base.rglob(ext):
+                    if p.is_file():
+                        files.append(str(p))
+                        if len(files) >= limit:
+                            return files
+    return files
+# ----------------------------------------------------------------------------- #
+# Routes
+# ----------------------------------------------------------------------------- #
+@app.get("/")
+def root():
+    return {"status": "ready", "service": "puppeteer-api"}
+@app.get("/health")
+def health():
+    gpu, name = torch_info()
+    return {"status": "ok", "cuda": gpu, "gpu": name}
+@app.post("/rig", response_model=RigOut)
+def rig(inp: RigIn):
+    ensure_dirs()
+    prepare_run_tree()
+    basename = safe_basename(str(inp.mesh_url))
+    mesh_path = TMP_IN_DIR / basename
+    _ = SAFE_NAME.sub("_", inp.workdir or "job")  # reserved, 현재는 미사용
+    try:
+        download_with_limit(str(inp.mesh_url), mesh_path)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Download error: {e}")
+    script = PUPPETEER_RUN / "demo_rigging.sh"
+    cmd = ["bash", str(script), str(mesh_path)]
+    try:
+        proc = subprocess.run(
+            cmd,
+            cwd=str(PUPPETEER_RUN),
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+        run_log = proc.stdout[-4000:]
+    except subprocess.CalledProcessError as e:
+        snippet = (e.stdout or "")[-2000:]
+        raise HTTPException(status_code=500, detail=f"Puppeteer failed: {snippet}")
+    except FileNotFoundError:
+        raise HTTPException(status_code=500, detail="demo_rigging.sh not found")
+    preview = scan_results(limit=20)
+    gpu, gpu_name = torch_info()
+    return RigOut(
+        status="ok",
+        result_dir=str(RESULT_DIR),
+        files_preview=preview[:10],
+        detail=run_log if preview else "no result files found",
+        gpu=gpu,
+        gpu_name=gpu_name,
+    )
+@app.post("/animate")
+def animate(inp: AnimateIn):
+    """
+    Puppeteer의 demo_animation.sh 실행 (영상 기반 애니메이션)
+    입력: video_url (mp4), mesh_path (rigged.glb 기본값)
+    """
+    pdir = Path("/app/Puppeteer")
+    script = pdir / "demo_animation.sh"
+    video_path = Path("/tmp/video.mp4")
+    if not script.exists():
+        raise HTTPException(status_code=404, detail="demo_animation.sh not found")
+    # -------- requests 기반 영상 다운로드 -------- #
+    try:
+        print(f"[animate] downloading video from {inp.video_url}")
+        with requests.get(str(inp.video_url), stream=True, timeout=60) as r:
+            r.raise_for_status()
+            with open(video_path, "wb") as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+        print(f"[animate] Video saved to {video_path}")
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Video download failed via requests: {e}")
+    # -------- Puppeteer 애니메이션 실행 -------- #
+    cmd = [
+        "bash", str(script),
+        "--mesh", str(inp.mesh_path),
+        "--video", str(video_path),
+    ]
+    try:
+        proc = subprocess.run(
+            cmd,
+            cwd=str(pdir),
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+        output = proc.stdout[-4000:]
+    except subprocess.CalledProcessError as e:
+        raise HTTPException(status_code=500, detail=f"Animation failed: {e.stdout[-2000:]}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
+    anim_results = scan_results(limit=20)
+    return {
+        "status": "ok",
+        "video_used": str(inp.video_url),
+        "detail": output,
+        "files_preview": anim_results[:10],
+    }
+# -------- 결과 파일 확인/다운로드 유틸 -------- #
+@app.get("/list")
+def list_results():
+    files = scan_results(limit=500)
+    return {"count": len(files), "files": files}
+@app.get("/download")
+def download(path: str):
+    p = Path(path).resolve()
+    # 안전한 경로만 허용
+    if not any(str(p).startswith(str(b.resolve())) for b in RESULT_BASES):
+        raise HTTPException(status_code=400, detail="invalid path")
+    if not p.exists() or not p.is_file():
+        raise HTTPException(status_code=404, detail="file not found")
+    return FileResponse(str(p), filename=p.name)
+@app.get("/debug")
+def debug():
+    pdir = Path("/app/Puppeteer")
+    script = pdir / "demo_rigging.sh"
+    ckpt_dir = pdir / "checkpoints"
+    req_file = pdir / "requirements.txt"
+    return {
+        "script_exists": script.exists(),
+        "ckpt_dir_exists": ckpt_dir.exists(),
+        "req_exists": req_file.exists(),
+        "ckpt_samples": [str(p) for p in ckpt_dir.glob("**/*")][:15],
+        "tmp_in": os.environ.get("TMP_IN_DIR", "/data/in"),
+        "result_dir": os.environ.get("RESULT_DIR", "/data/results"),
+        "omp_num_threads": os.environ.get("OMP_NUM_THREADS"),
+    }

app_backup.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os, subprocess, requests
+from fastapi import FastAPI
+from pydantic import BaseModel
+app = FastAPI()
+class RigIn(BaseModel):
+    mesh_url: str  # 입력 파일 URL (obj, glb, fbx 등)
+@app.get("/")
+def root():
+    return {"message": "Puppeteer API (GPU) ready"}
+@app.get("/health")
+def health():
+    try:
+        import torch
+        gpu = torch.cuda.is_available()
+        name = torch.cuda.get_device_name(0) if gpu else None
+        return {"status": "ok", "cuda": gpu, "gpu": name}
+    except Exception as e:
+        return {"status": "ok", "cuda": False, "detail": str(e)}
+@app.post("/rig")
+def rig(inp: RigIn):
+    os.makedirs("/tmp/in", exist_ok=True)
+    mesh_path = os.path.join("/tmp/in", os.path.basename(inp.mesh_url))
+    # 1️⃣ 입력 파일 다운로드
+    with requests.get(inp.mesh_url, stream=True) as r:
+        r.raise_for_status()
+        with open(mesh_path, "wb") as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+    # 2️⃣ Puppeteer 실행
+    workdir = "/app/Puppeteer"
+    cmd = ["bash", "demo_rigging.sh", mesh_path]
+    try:
+        subprocess.run(cmd, cwd=workdir, check=True)
+    except subprocess.CalledProcessError as e:
+        return {"status": "error", "detail": str(e)}
+    # 3️⃣ 결과 목록 반환
+    result_dir = os.path.join(workdir, "results")
+    files = []
+    for rootdir, _, filenames in os.walk(result_dir):
+        for fn in filenames:
+            files.append(os.path.join(rootdir, fn))
+            if len(files) >= 20: break
+    return {"status": "ok", "result_dir": result_dir, "files_preview": files[:10]}

app_backup_encoding.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os, subprocess, requests
+from fastapi import FastAPI
+from pydantic import BaseModel
+app = FastAPI()
+class RigIn(BaseModel):
+혻 혻 mesh_url: str혻 # ?낅젰 ?뚯씪 URL (obj, glb, fbx ??
+@app.get("/")
+def root():
+혻 혻 return {"message": "Puppeteer API (GPU) ready"}
+@app.get("/health")
+def health():
+혻 혻 try:
+혻 혻 혻 혻 import torch
+혻 혻 혻 혻 gpu = torch.cuda.is_available()
+혻 혻 혻 혻 name = torch.cuda.get_device_name(0) if gpu else None
+혻 혻 혻 혻 return {"status": "ok", "cuda": gpu, "gpu": name}
+혻 혻 except Exception as e:
+혻 혻 혻 혻 return {"status": "ok", "cuda": False, "detail": str(e)}
+@app.post("/rig")
+def rig(inp: RigIn):
+혻 혻 os.makedirs("/tmp/in", exist_ok=True)
+혻 혻 mesh_path = os.path.join("/tmp/in", os.path.basename(inp.mesh_url))
+혻 혻 # 1截뤴깵 ?낅젰 ?뚯씪 ?ㅼ슫濡쒕뱶
+혻 혻 with requests.get(inp.mesh_url, stream=True) as r:
+혻 혻 혻 혻 r.raise_for_status()
+혻 혻 혻 혻 with open(mesh_path, "wb") as f:
+혻 혻 혻 혻 혻 혻 for chunk in r.iter_content(chunk_size=8192):
+혻 혻 혻 혻 혻 혻 혻 혻 if chunk:
+혻 혻 혻 혻 혻 혻 혻 혻 혻 혻 f.write(chunk)
+혻 혻 # 2截뤴깵 Puppeteer ?ㅽ뻾
+혻 혻 workdir = "/app/Puppeteer"
+혻 혻 cmd = ["bash", "demo_rigging.sh", mesh_path]
+혻 혻 try:
+혻 혻 혻 혻 subprocess.run(cmd, cwd=workdir, check=True)
+혻 혻 except subprocess.CalledProcessError as e:
+혻 혻 혻 혻 return {"status": "error", "detail": str(e)}
+혻 혻 # 3截뤴깵 寃곌낵 紐⑸줉 諛섑솚
+혻 혻 result_dir = os.path.join(workdir, "results")
+혻 혻 files = []
+혻 혻 for rootdir, _, filenames in os.walk(result_dir):
+혻 혻 혻 혻 for fn in filenames:
+혻 혻 혻 혻 혻 혻 files.append(os.path.join(rootdir, fn))
+혻 혻 혻 혻 혻 혻 if len(files) >= 20: break
+혻 혻 return {"status": "ok", "result_dir": result_dir, "files_preview": files[:10]}

requirements.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+# server
+fastapi==0.115.5
+uvicorn[standard]==0.34.0
+pydantic==2.9.2
+requests==2.32.3
+# numeric stack (pin to avoid ABI woes)
+numpy<2
+scipy==1.11.4
+# geometry / images
+trimesh==4.4.9
+networkx==3.3
+scikit-image==0.24.0
+opencv-python-headless   #
+# training/runtime utils
+tqdm==4.66.5
+einops==0.8.0
+accelerate==1.0.1
+timm==1.0.9
+# Hugging Face stack for skeleton step
+transformers==4.44.2
+tokenizers>=0.14.0
+safetensors>=0.4.2
+huggingface-hub>=0.23.0

samples/demo.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e66e01296a4984841baaf0b9542aed07a5d5eb84958135a8d612b9ff1ec9419
+size 574823

test_puppeteer_api_v2.ps1 ADDED Viewed

	@@ -0,0 +1,57 @@

+param(
+  [string]$BaseUrl = "https://seungminkwak-puppeteer-api.hf.space",
+  [string]$Token   = "",
+  [string]$MeshUrl = "https://cdn.jsdelivr.net/gh/KhronosGroup/glTF-Sample-Models@master/2.0/CesiumMan/glTF-Binary/CesiumMan.glb",
+  [string]$Workdir = "job-cesium",
+  [int]$MaxTries = 12,
+  [int]$DelaySec = 10
+)
+$ErrorActionPreference = "Stop"
+Write-Host "=== Puppeteer API quick test ==="
+Write-Host "[1] /health"
+$health = Invoke-RestMethod -Uri "$BaseUrl/health" -Headers @{ Authorization = "Bearer $Token" }
+$health | ConvertTo-Json -Depth 6 | Write-Host
+Write-Host "[2] /rig"
+$body = @{ mesh_url = $MeshUrl; workdir = $Workdir } | ConvertTo-Json -Depth 6
+$resp = Invoke-RestMethod -Uri "$BaseUrl/rig" -Headers @{ Authorization = "Bearer $Token"; "Content-Type"="application/json" } -Method POST -Body $body
+$resp | ConvertTo-Json -Depth 6 | Write-Host
+Write-Host "[3] Poll /list"
+$files = @()
+for ($i=1; $i -le $MaxTries; $i++) {
+  try {
+    $list = Invoke-RestMethod -Uri "$BaseUrl/list" -Headers @{ Authorization = "Bearer $Token" }
+    if ($list.files_preview) {
+      $files = $list.files_preview
+      Write-Host (" -> Found: {0}" -f ($files -join ", "))
+      break
+    } else {
+      Write-Host (" -> Try {0}/{1}: no files yet" -f $i, $MaxTries)
+    }
+  } catch {
+    Write-Host (" -> Try {0}/{1}: error {2}" -f $i, $MaxTries, $_.Exception.Message)
+  }
+  Start-Sleep -Seconds $DelaySec
+}
+if (-not $files -or $files.Count -eq 0) {
+  Write-Host "No result files found." -ForegroundColor Red
+  exit 2
+}
+# choose a file
+$preferred = "/data/results/rigged.glb"
+$target = if ($files -contains $preferred) { $preferred } else { $files[0] }
+Write-Host ("[4] Download {0}" -f $target)
+$enc = [uri]::EscapeDataString($target)
+$newDir = Join-Path $PWD "results"
+New-Item -ItemType Directory -Path $newDir -Force | Out-Null
+$out = Join-Path $newDir (Split-Path -Leaf $target)
+Invoke-WebRequest -Uri "$BaseUrl/download?path=$enc" -Headers @{ Authorization = "Bearer $Token" } -OutFile $out
+Write-Host ("Saved to {0}" -f $out)
+try { ii $out | Out-Null } catch {}
+Write-Host "=== Done ==="

third_party/Puppeteer/.gitmodules ADDED Viewed

	@@ -0,0 +1,15 @@

+[submodule "animation/third_partys/ptlflow"]
+	path = animation/third_partys/ptlflow
+	url = https://github.com/ChaoyueSong/ptlflow
+[submodule "animation/third_partys/co_tracker"]
+	path = animation/third_partys/co_tracker
+	url = https://github.com/ChaoyueSong/co_tracker
+[submodule "animation/third_partys/Video_Depth_Anything"]
+	path = animation/third_partys/Video_Depth_Anything
+	url = https://github.com/ChaoyueSong/Video_Depth_Anything
+[submodule "skinning/third_partys/PartField"]
+	path = skinning/third_partys/PartField
+	url = https://github.com/ChaoyueSong/PartField
+[submodule "skeleton/third_partys/Michelangelo"]
+	path = skeleton/third_partys/Michelangelo
+	url = https://github.com/ChaoyueSong/Michelangelo/

third_party/Puppeteer/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

third_party/Puppeteer/README.md ADDED Viewed

	@@ -0,0 +1,105 @@

+<div align="center">
+<h1>Puppeteer: Rig and Animate Your 3D Models</h1>
+<p>
+  <a href="https://chaoyuesong.github.io"><strong>Chaoyue Song</strong></a><sup>1,2</sup>,
+  <a href="https://lixiulive.com/"><strong>Xiu Li</strong></a><sup>2</sup>,
+  <a href="https://scholar.google.com/citations?user=afDvaa8AAAAJ&hl"><strong>Fan Yang</strong></a><sup>1</sup>,
+  <a href="https://zcxu-eric.github.io/"><strong>Zhongcong Xu</strong></a><sup>2</sup>,
+  <a href="https://plusmultiply.github.io/"><strong>Jiacheng Wei</strong></a><sup>1</sup>,
+ <br>
+  <a href="https://sites.google.com/site/fayaoliu"><strong>Fayao Liu</strong></a><sup>3</sup>,
+  <a href="https://scholar.google.com.sg/citations?user=Q8iay0gAAAAJ"><strong>Jiashi Feng</strong></a><sup>2</sup>,
+  <a href="https://guosheng.github.io/"><strong>Guosheng Lin</strong></a><sup>1*</sup>,
+  <a href="https://jfzhang95.github.io/"><strong>Jianfeng Zhang</strong></a><sup>2*</sup>
+  <br>
+  *Corresponding authors
+  <br>
+    <sup>1 </sup>Nanyang Technological University
+  <sup>2 </sup>Bytedance Seed
+  <sup>3 </sup>A*STAR
+</p>
+<h3>arXiv 2025</h3>
+<div align="center">
+  <img width="80%" src="assets/puppeteer_teaser.gif">
+</div>
+<p>
+  <a href="https://chaoyuesong.github.io/Puppeteer/"><strong>Project</strong></a> |
+  <a href="https://arxiv.org/abs/2508.10898"><strong>Paper</strong></a> |
+  <a href="https://www.youtube.com/watch?v=DnKx803JHyI"><strong>Video</strong></a> |
+  <a href="https://huggingface.co/datasets/chaoyue7/Articulation-XL2.0"><strong>Data: Articulation-XL2.0</strong></a>
+</p>
+</div>
+<br/>
+Puppeteer is proposed for **automatic rigging and animation of 3D objects**. Given a 3D object, Puppeteer first automatically generates skeletal structures and skinning weights, then animates the rigged model with video guidance through a differentiable optimization pipeline. This comprehensive approach aims to enable fully automated transformation of static 3D models into dynamically animated assets, eliminating the need for manual rigging expertise and significantly streamlining 3D content creation workflows.
+<br/>
+## 🔥 News
+- Sep 09, 2025: We uploaded the [video](https://www.youtube.com/watch?v=DnKx803JHyI) for Puppeteer.
+- Sep 04, 2025: Release the inference codes and [model checkpoints](https://huggingface.co/Seed3D/Puppeteer).
+- Aug 15, 2025: Release [paper](https://arxiv.org/abs/2508.10898) of Puppeteer!
+## 🔧 Installtation
+We use Python 3.10 with PyTorch 2.1.1 and CUDA 11.8. The environment and required packages can be installed as follows:
+```
+git clone https://github.com/ByteDance-Seed/Puppeteer.git --recursive && cd Puppeteer
+conda create -n puppeteer python==3.10.13 -y
+conda activate puppeteer
+pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu118
+pip install -r requirements.txt
+pip install flash-attn==2.6.3 --no-build-isolation
+pip install torch-scatter -f https://data.pyg.org/whl/torch-2.1.1+cu118.html
+pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu118_pyt211/download.html
+```
+## 🚀 Demo
+We provide a complete pipeline for rigging and animating 3D models. Before running the pipeline, visit each folder (skeleton, skinning, animation) to download the necessary model checkpoints. Example data is available in the [examples](https://github.com/ByteDance-Seed/Puppeteer/tree/main/examples) folder.
+### Rigging
+Given 3D meshes, we first predict the skeleton and skinning weights:
+```
+bash demo_rigging.sh
+```
+The final rig files will be saved in `results/final_rigging`. To evaluate the [skeleton](https://github.com/ByteDance-Seed/Puppeteer/tree/main/skeleton) and [skinning](https://github.com/ByteDance-Seed/Puppeteer/tree/main/skinning) components separately, refer to their respective folders.
+### Video-guided 3D animation
+To animate the rigged model using video guidance, run:
+```
+bash demo_animation.sh
+```
+The rendered 3D animation sequence from different views will be saved in `results/animation`. Refer to the [animation folder](https://github.com/ByteDance-Seed/Puppeteer/tree/main/animation) for comprehensive details on data processing and structure.
+## 😊 Acknowledgment
+The code builds upon [MagicArticulate](https://github.com/Seed3D/MagicArticulate), [MeshAnything](https://github.com/buaacyw/MeshAnything), [Functional Diffusion](https://1zb.github.io/functional-diffusion/), [RigNet](https://github.com/zhan-xu/RigNet), [Michelangelo](https://github.com/NeuralCarver/Michelangelo/), [PartField](https://github.com/nv-tlabs/PartField), [AnyMole](https://github.com/kwanyun/AnyMoLe) and [Lab4D](https://github.com/lab4d-org/lab4d). We gratefully acknowledge the authors for making their work publicly available.
+## 📚 Citation
+```
+@article{song2025puppeteer,
+  title={Puppeteer: Rig and Animate Your 3D Models},
+  author={Chaoyue Song and Xiu Li and Fan Yang and Zhongcong Xu and Jiacheng Wei and Fayao Liu and Jiashi Feng and Guosheng Lin and Jianfeng Zhang},
+  journal={arXiv preprint arXiv:2508.10898},
+  year={2025}
+}
+```

third_party/Puppeteer/animation/README.md ADDED Viewed

	@@ -0,0 +1,76 @@

+# 3D Animation with Video Guidance
+This repository provides a complete pipeline for generating 3D object animations with video guidance. The system includes data processing and optimization algorithms for rigging-based animation.
+## Overview
+The pipeline takes a rigged 3D model and a reference video, then optimizes the object's motion to match the video guidance while maintaining realistic skeletal constraints.
+## Prerequisites
+### Model Downloads
+Download the required pre-trained models:
+- [Video-Depth-Anything](https://huggingface.co/depth-anything/Video-Depth-Anything-Large) - For depth estimation
+- [CoTracker3](https://huggingface.co/facebook/cotracker3) - For point tracking
+```
+python download.py
+```
+### Input Data Structure
+Organize your input data as follows:
+```
+inputs/
+└── {seq_name}/
+    ├── objs/
+    │   ├── mesh.obj          # 3D mesh geometry
+    │   ├── rig.txt           # Rigging definition
+    │   ├── material.mtl      # Material properties (optional)
+    │   └── texture.png       # Texture maps (optional)
+    ├── first_frames/         # Rendered initial frames
+    ├── imgs/                 # Extracted video frames
+    ├── flow/                 # Optical flow data
+    ├── flow_vis/             # Visualized optical flow
+    ├── depth/                # Esitmated depth data
+    ├── track/                # tracked joints/vertices
+    └── input.mp4             # Source video
+```
+## Data Processing
+Given a 3D model with rigging under `inputs/{seq_name}/objs` (`mesh.obj, rig.txt`, optional `.mtl` and texture `.png`), we first render the object from a specified viewpoint. This image is used as the input (first frame) to the video generation model (e.g., [Jimeng AI](https://jimeng.jianying.com/ai-tool/home?type=video)).
+```
+python utils/render_first_frame.py --input_path inputs --seq_name {seq_name}
+```
+Replace `{seq_name}` with your sequence name. The first-frame images are saved to `inputs/{seq_name}/first_frames`. This generates reference images from 4 different viewpoints (you can add more). Choose the viewpoint that best shows the object's joints and key parts for optimal animation results. Save the generated videos to `inputs/{seq_name}/input.mp4`.
+Then we extract the frames from the video by running:
+```
+cd inputs/{seq_name}; mkdir imgs
+ffmpeg -i input.mp4 -vf fps=10 imgs/frame_%04d.png
+cd ../../
+```
+Estimate optical flows by running:
+```
+python utils/save_flow.py --input_path inputs --seq_name {seq_name}
+```
+The flow `.flo` files are saved to `inputs/{seq_name}/flow`, the flow visualization are saved to `inputs/{seq_name}/flow_vis`. Depth and tracking information are saved during optimization.
+## Optimization
+To optimize the animation, you can run
+```
+bash demo.sh
+```
+The results are saved to `results/{seq_name}/{save_name}`. Modify `--main_renderer` and `--additional_renderers` to change rendering viewpoints. If animations exhibit jitter or instability, increase the root/joint smoothing weights for better temporal consistency.
+## TODO
+- [ ] Add multi-view supervisions.

third_party/Puppeteer/animation/demo.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+python optimization.py --save_path results --iter 200 --input_path inputs --img_size 960 \
+        --seq_name 'fish' --save_name 'fish' --coherence_weight 5
+# python optimization.py --save_path results --iter 200 --input_path inputs --img_size 960 \
+#         --seq_name 'crocodile' --save_name 'crocodile_demo'  --coherence_weight 15

third_party/Puppeteer/animation/download.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from huggingface_hub import hf_hub_download
+file_path = hf_hub_download(
+    repo_id="facebook/cotracker3",
+    filename="scaled_offline.pth",
+    local_dir="third_partys/co_tracker/ckpt"
+)
+file_path = hf_hub_download(
+    repo_id="depth-anything/Video-Depth-Anything-Large",
+    filename="video_depth_anything_vitl.pth",
+    local_dir="third_partys/Video_Depth_Anything/ckpt"
+)

third_party/Puppeteer/animation/model.py ADDED Viewed

	@@ -0,0 +1,199 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import torch
+from typing import List, Optional, Tuple, Union
+from collections import deque
+from pytorch3d.structures import Meshes, join_meshes_as_scene
+from pytorch3d.renderer import TexturesVertex, TexturesUV
+from utils.quat_utils import quat_to_transform_matrix, quat_multiply, quat_rotate_vector
+class RiggingModel:
+    """
+    A 3D rigged model supporting skeletal animation.
+    Handles mesh geometry, skeletal hierarchy, skinning weights, and
+    linear blend skinning (LBS) deformation.
+    """
+    def __init__(self, device = "cuda:0"):
+        self.device = device
+        # Mesh data
+        self.vertices: List[torch.Tensor] = []
+        self.faces: List[torch.Tensor] = []
+        self.textures: List[Union[TexturesVertex, TexturesUV]] = []
+        # Skeletal data
+        self.bones: Optional[torch.Tensor] = None       # (N, 2) [parent, child] pairs
+        self.parent_indices: Optional[torch.Tensor] = None  # (J,) parent index for each joint
+        self.root_index: Optional[int] = None           # Root joint index
+        self.joints_rest: Optional[torch.Tensor] = None # (J, 3) rest pose positions
+        self.skin_weights: List[torch.Tensor] = [] # List of (V_i, J) skinning weights
+        # Fixed local positions
+        self.rest_local_positions: Optional[torch.Tensor] = None  # (J, 3)
+        # Computed data
+        self.bind_matrices_inv: Optional[torch.Tensor] = None  # (J, 4, 4) inverse bind matrices
+        self.deformed_vertices: Optional[List[torch.Tensor]] = None  # List of (T, V_i, 3)
+        self.joint_positions: Optional[torch.Tensor] = None    # (T, J, 3) current joint positions
+        # Validation flags
+        self._bind_matrices_initialized = False
+    def initialize_bind_matrices(self, rest_local_pos):
+        """Initialize bind matrices and store rest local positions."""
+        self.rest_local_positions = rest_local_pos.to(self.device)
+        J = rest_local_pos.shape[0]
+        rest_global_quats, rest_global_pos = self.forward_kinematics(
+            torch.tensor([[[1.0, 0.0, 0.0, 0.0]] * J], device=self.device),  # unit quaternion
+            self.parent_indices,
+            self.root_index
+        )
+        bind_matrices = quat_to_transform_matrix(rest_global_quats, rest_global_pos)  # (1,J,4,4)
+        self.bind_matrices_inv = torch.inverse(bind_matrices.squeeze(0))  # (J,4,4)
+        self._bind_matrices_initialized = True
+    def animate(self, local_quaternions, root_quaternion = None, root_position = None):
+        """
+        Animate the model using local joint transformations.
+        Args:
+            local_quaternions: (T, J, 4) local rotations per frame
+            root_quaternion: (T, 4) global root rotation
+            root_position: (T, 3) global root translation
+        """
+        if not self._bind_matrices_initialized:
+            raise RuntimeError("Bind matrices not initialized. Call initialize_bind_matrices() first.")
+        # Forward kinematics
+        global_quats, global_pos = self.forward_kinematics(
+            local_quaternions,
+            self.parent_indices,
+            self.root_index
+        )
+        self.joint_positions = global_pos
+        joint_transforms = quat_to_transform_matrix(global_quats, global_pos)  # (T, J, 4, 4)
+        # Apply global root transformation if provided
+        if root_quaternion is not None and root_position is not None:
+            root_transform = quat_to_transform_matrix(root_quaternion, root_position)
+            joint_transforms = root_transform[:, None] @ joint_transforms
+            self.joint_positions = joint_transforms[..., :3, 3]
+        # Linear blend skinning
+        self.deformed_vertices = []
+        for i, vertices in enumerate(self.vertices):
+            deformed = self._linear_blend_skinning(
+                vertices,
+                joint_transforms,
+                self.skin_weights[i],
+                self.bind_matrices_inv
+            )
+            self.deformed_vertices.append(deformed)
+    def get_mesh(self, frame_idx=None):
+        meshes = []
+        for i in range(len(self.vertices)):
+            mesh = Meshes(
+                verts=[self.vertices[i]] if frame_idx is None or self.deformed_vertices is None else [self.deformed_vertices[i][frame_idx]],
+                faces=[self.faces[i]],
+                textures=self.textures[i]
+            )
+            meshes.append(mesh)
+        return join_meshes_as_scene(meshes)
+    def _linear_blend_skinning(self, vertices, joint_transforms, skin_weights, bind_matrices_inv):
+        """
+        Apply linear blend skinning to vertices.
+        Args:
+            vertices: (V, 3) vertex positions
+            joint_transforms: (T, J, 4, 4) joint transformation matrices
+            skin_weights: (V, J) per-vertex joint weights
+            bind_matrices_inv: (J, 4, 4) inverse bind matrices
+        Returns:
+            (T, V, 3) deformed vertices
+        """
+        # Compute final transformation matrices
+        transforms = torch.matmul(joint_transforms, bind_matrices_inv)  # (T, J, 4, 4)
+        # Weight and blend transformations
+        weighted_transforms = torch.einsum('vj,tjab->tvab', skin_weights, transforms)  # (T, V, 4, 4)
+        # Apply to vertices
+        vertices_hom = torch.cat([vertices, torch.ones(vertices.shape[0], 1, device=vertices.device)], dim=-1)
+        deformed = torch.matmul(weighted_transforms, vertices_hom.unsqueeze(-1)).squeeze(-1)
+        return deformed[..., :3]
+    def forward_kinematics(self, local_quaternions, parent_indices, root_index = 0):
+        """
+        Compute global joint transformations from local ones.
+        Args:
+            local_quaternions: (B, J, 4) local rotations
+            parent_indices: (J,) parent index for each joint
+            root_index: Root joint index
+        Returns:
+            Tuple of (global_quaternions, global_positions)
+        """
+        B, J = local_quaternions.shape[:2]
+        local_positions = self.rest_local_positions.unsqueeze(0).expand(B, -1, -1)
+        # Initialize storage
+        global_quats = [None] * J
+        global_positions = [None] * J
+        # Build children mapping
+        children = [[] for _ in range(J)]
+        for child_idx in range(J):
+            parent_idx = parent_indices[child_idx]
+            if parent_idx >= 0:
+                children[parent_idx].append(child_idx)
+        # Breadth-first traversal from root
+        queue = deque([root_index])
+        visited = {root_index}
+        # Process root
+        global_quats[root_index] = local_quaternions[:, root_index]
+        global_positions[root_index] = local_positions[:, root_index]
+        while queue:
+            current = queue.popleft()
+            current_quat = global_quats[current]
+            current_pos = global_positions[current]
+            for child in children[current]:
+                if child not in visited:
+                    visited.add(child)
+                    queue.append(child)
+                    # Transform child to global space
+                    child_quat = quat_multiply(current_quat, local_quaternions[:, child])
+                    child_pos = quat_rotate_vector(current_quat, local_positions[:, child]) + current_pos
+                    global_quats[child] = child_quat
+                    global_positions[child] = child_pos
+        return torch.stack(global_quats, dim=1), torch.stack(global_positions, dim=1)

third_party/Puppeteer/animation/optimization.py ADDED Viewed

	@@ -0,0 +1,626 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import argparse
+import json
+import numpy as np
+import logging
+import glob
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from tqdm import tqdm
+from renderer import MeshRenderer3D
+from model import RiggingModel
+from utils.quat_utils import (
+    compute_rest_local_positions, quat_inverse, quat_log, quat_multiply
+)
+from utils.loss_utils import (
+    DepthModule, compute_reprojection_loss, geodesic_loss, root_motion_reg,
+    calculate_flow_loss, compute_depth_loss_normalized, joint_motion_coherence
+)
+from utils.data_loader import load_model_from_obj_and_rig, prepare_depth
+from utils.save_utils import (
+    save_args, visualize_joints_on_mesh, save_final_video,
+    save_and_smooth_results, visualize_points_on_mesh, save_track_points
+)
+from utils.misc import warmup_then_decay
+from third_partys.co_tracker.save_track import save_track
+class AnimationOptimizer:
+    """Main class for animation optimization with video guidance."""
+    def __init__(self, args, device = 'cuda:0'):
+        self.args = args
+        self.device = device
+        self.logger = self._setup_logger()
+        # Training parameters
+        self.reinit_patience_threshold = 20
+        self.loss_divergence_factor = 2.0
+        self.gradient_clip_norm = 1.0
+        # Loss weights
+        self.target_ratios = {
+            'rgb': args.rgb_wt,
+            'flow': args.flow_wt,
+            'proj_joint': args.proj_joint_wt,
+            'proj_vert': args.proj_vert_wt,
+            'depth': args.depth_wt,
+            'mask': args.mask_wt
+        }
+        self.loss_weights = {
+            'rgb': 1.0,
+            'flow': 1.0,
+            'proj_joint': 1.0,
+            'proj_vert': 1.0,
+            'depth': 1.0,
+            'mask': 1.0
+        }
+    def _setup_logger(self):
+        """Set up logging configuration."""
+        logger = logging.getLogger("animation_optimizer")
+        logger.setLevel(logging.INFO)
+        if not logger.handlers:
+            formatter = logging.Formatter(
+                "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+            )
+            console_handler = logging.StreamHandler()
+            console_handler.setFormatter(formatter)
+            logger.addHandler(console_handler)
+        return logger
+    def _add_file_handler(self, log_path):
+        """Add file handler to logger."""
+        file_handler = logging.FileHandler(log_path)
+        formatter = logging.Formatter("%(asctime)s %(message)s")
+        file_handler.setFormatter(formatter)
+        self.logger.addHandler(file_handler)
+    def _initialize_parameters(self, batch_size, num_joints):
+        """Initialize optimization parameters."""
+        # Fixed first frame quaternions (identity)
+        fixed_quat_0 = torch.zeros((1, num_joints, 4), device=self.device)
+        fixed_quat_0[..., 0] = 1.0
+        # Initialize learnable quaternions for frames 1 to B-1
+        learn_quats_init = torch.zeros((batch_size - 1, num_joints, 4), device=self.device)
+        learn_quats_init[..., 0] = 1.0
+        quats_to_optimize = learn_quats_init.clone().detach().requires_grad_(True)
+        # Initialize global transformations
+        fixed_global_quat_0 = torch.zeros((1, 4), device=self.device)
+        fixed_global_quat_0[:, 0] = 1.0
+        fixed_global_trans_0 = torch.zeros((1, 3), device=self.device)
+        # Initialize learnable global transformations
+        global_quats_init = torch.zeros((batch_size - 1, 4), device=self.device)
+        global_quats_init[:, 0] = 1.0
+        global_trans_init = torch.zeros((batch_size - 1, 3), device=self.device)
+        global_quats = global_quats_init.clone().detach().requires_grad_(True)
+        global_trans = global_trans_init.clone().detach().requires_grad_(True)
+        return quats_to_optimize, global_quats, global_trans, fixed_quat_0, fixed_global_quat_0, fixed_global_trans_0
+    def _setup_optimizer_and_scheduler(self, quats_to_optimize, global_quats, global_trans, n_iters):
+        """Set up optimizer and learning rate scheduler."""
+        base_lr = self.args.warm_lr
+        max_lr = self.args.lr
+        warmup_steps = 20
+        min_lr = self.args.min_lr
+        quat_lr = base_lr # *2
+        optimizer = torch.optim.AdamW([
+            {'params': quats_to_optimize, 'lr': quat_lr},
+            {'params': global_quats, 'lr': quat_lr},
+            {'params': global_trans, 'lr': base_lr}
+        ])
+        scheduler = warmup_then_decay(
+            optimizer=optimizer,
+            total_steps=n_iters,
+            warmup_steps=warmup_steps,
+            max_lr=max_lr,
+            min_lr=min_lr,
+            base_lr=base_lr
+        )
+        return optimizer, scheduler
+    def _compute_smoothness_losses(self, quats_normed, all_global_quats_normed, all_global_trans, model):
+        """Compute various smoothness losses."""
+        # Rotation smoothness loss using geodesic distance
+        theta = geodesic_loss(quats_normed[1:], quats_normed[:-1])
+        rot_smoothness_loss = (theta ** 2).mean()
+        # Second-order rotation smoothness (acceleration)
+        omega = quat_log(quat_multiply(quat_inverse(quats_normed[:-1]), quats_normed[1:]))
+        rot_acc = omega[1:] - omega[:-1]
+        rot_acc_smoothness_loss = rot_acc.pow(2).mean()
+        # Joint motion coherence loss (parent-child relative motion smoothness)
+        joint_coherence_loss = joint_motion_coherence(quats_normed, model.parent_indices)
+        # Root motion regularization
+        root_pos_smooth_loss, root_quat_smooth_loss = root_motion_reg(
+            all_global_quats_normed, all_global_trans
+        )
+        return rot_smoothness_loss, rot_acc_smoothness_loss, joint_coherence_loss, root_pos_smooth_loss + root_quat_smooth_loss
+    def pre_calibrate_loss_weights(self, loss_components, target_ratios=None):
+        """ calibrate loss weights """
+        loss_for_ratio = {name: loss.detach().clone() for name, loss in loss_components.items()}
+        rgb_loss = loss_for_ratio['rgb'].item()
+        for name, loss_val in loss_for_ratio.items():
+            if name == 'rgb':
+                continue
+            if loss_val > 1e-8:
+                scale_factor = rgb_loss / loss_val.item()
+                target_ratio = target_ratios.get(name, 1.0)
+                new_weight = self.loss_weights.get(name, 1.0) * scale_factor * target_ratio
+                self.loss_weights[name] = new_weight
+    def _compute_losses(
+        self,
+        model,
+        renderer,
+        images_batch,
+        tracked_joints_2d,
+        joint_vis_mask,
+        track_verts_2d,
+        vert_vis_mask,
+        sampled_vertex_indices,
+        track_indices,
+        flow_dirs,
+        depth_gt_raw,
+        mask,
+        out_dir,
+        iteration
+    ):
+        """Compute all losses for the optimization."""
+        batch_size = images_batch.shape[0]
+        meshes = [model.get_mesh(t) for t in range(batch_size)]
+        pred_images_all = renderer.render_batch(meshes)
+        # 2D projection losses
+        pred_joints_3d = model.joint_positions
+        proj_joint_loss = compute_reprojection_loss(
+            renderer, joint_vis_mask, pred_joints_3d,
+            tracked_joints_2d, self.args.img_size
+        )
+        pred_points_3d = model.deformed_vertices[0]
+        proj_vert_loss = compute_reprojection_loss(
+            renderer, vert_vis_mask,
+            pred_points_3d[:, sampled_vertex_indices],
+            track_verts_2d[:, track_indices],
+            self.args.img_size
+        )
+        # RGB loss
+        pred_rgb = pred_images_all[..., :3]
+        real_rgb = images_batch[..., :3]
+        diff_rgb_masked = (pred_rgb - real_rgb) * mask.unsqueeze(-1)
+        mse_rgb_num = (diff_rgb_masked ** 2).sum()
+        mse_rgb_den = mask.sum() * 3
+        rgb_loss = mse_rgb_num / mse_rgb_den.clamp_min(1e-8)
+        # Mask loss
+        silhouette_soft = renderer.render_silhouette_batch(meshes).squeeze()
+        mask_loss = F.binary_cross_entropy(silhouette_soft, mask)
+        # Depth losses
+        fragments = renderer.get_rasterization_fragments(meshes)
+        zbuf_depths = fragments.zbuf[..., 0]
+        depth_loss = compute_depth_loss_normalized(depth_gt_raw, zbuf_depths, mask)
+        # Flow losses
+        flow_loss = calculate_flow_loss(flow_dirs, self.device, mask, renderer, model)
+        loss_components = {
+            'rgb': rgb_loss,
+            'proj_joint': proj_joint_loss,
+            'proj_vert': proj_vert_loss,
+            'depth': depth_loss,
+            'flow': flow_loss,
+            'mask': mask_loss
+        }
+        return loss_components
+    def optimization(
+        self,
+        images_batch,
+        model,
+        renderer,
+        tracked_joints_2d,
+        joint_vis_mask,
+        track_verts_2d,
+        vert_vis_mask,
+        sampled_vertex_indices,
+        track_indices,
+        flow_dirs,
+        n_iters,
+        out_dir):
+        """
+        Optimize animation parameters with fixed first frame.
+        """
+        torch.autograd.set_detect_anomaly(True)
+        batch_size, _, _, _ = images_batch.shape
+        num_joints = model.joints_rest.shape[0]
+        # Setup output directory and logging
+        os.makedirs(out_dir, exist_ok=True)
+        log_path = os.path.join(out_dir, "optimization.log")
+        self._add_file_handler(log_path)
+        # Initialize parameters
+        (quats_to_optimize, global_quats, global_trans,
+         fixed_quat_0, fixed_global_quat_0, fixed_global_trans_0) = self._initialize_parameters(batch_size, num_joints)
+        # Setup rest positions and bind matrices
+        rest_local_pos = compute_rest_local_positions(model.joints_rest, model.parent_indices)
+        model.initialize_bind_matrices(rest_local_pos)
+        # Setup optimizer and scheduler
+        optimizer, scheduler = self._setup_optimizer_and_scheduler(
+            quats_to_optimize, global_quats, global_trans, n_iters
+        )
+        # Initialize depth module and flow weights
+        depth_module = DepthModule(
+            encoder='vitl',
+            device=self.device,
+            input_size=images_batch.shape[1],
+            fp32=False
+        )
+        # Prepare masks
+        real_rgb = images_batch[..., :3]
+        threshold = 0.95
+        with torch.no_grad():
+            background_mask = (real_rgb > threshold).all(dim=-1)
+            mask = (~background_mask).float()
+        depth_gt_raw = prepare_depth(
+            flow_dirs.replace('flow', 'depth'), real_rgb, self.device, depth_module
+        )
+        # Optimization tracking
+        best_loss = float('inf')
+        patience = 0
+        best_params = None
+        pbar = tqdm(total=n_iters, desc="Optimizing animation")
+        for iteration in range(n_iters):
+            # Combine fixed and learnable parameters
+            quats_all = torch.cat([fixed_quat_0, quats_to_optimize], dim=0)
+            # Normalize quaternions
+            reshaped = quats_all.reshape(-1, 4)
+            norm = torch.norm(reshaped, dim=1, keepdim=True).clamp_min(1e-8)
+            quats_normed = (reshaped / norm).reshape(batch_size, num_joints, 4)
+            # Global transformations
+            all_global_quats = torch.cat([fixed_global_quat_0, global_quats], dim=0)
+            all_global_trans = torch.cat([fixed_global_trans_0, global_trans], dim=0)
+            all_global_quats_normed = all_global_quats / torch.norm(
+                all_global_quats, dim=-1, keepdim=True
+            ).clamp_min(1e-8)
+            # Compute smoothness losses
+            (rot_smoothness_loss, rot_acc_smoothness_loss, joint_coherence_loss,
+            root_smooth_loss) = self._compute_smoothness_losses(
+                quats_normed, all_global_quats_normed, all_global_trans, model
+            )
+            # animate model
+            model.animate(quats_normed, all_global_quats_normed, all_global_trans)
+            # Verify first frame hasn't changed
+            verts0 = model.vertices[0]
+            de0 = model.deformed_vertices[0][0]
+            assert torch.allclose(de0, verts0, atol=1e-2), "First frame vertices have changed!"
+            # Compute all losses
+            loss_components = self._compute_losses(
+                model, renderer, images_batch, tracked_joints_2d, joint_vis_mask,
+                track_verts_2d, vert_vis_mask, sampled_vertex_indices, track_indices,
+                flow_dirs, depth_gt_raw, mask, out_dir, iteration
+            )
+            total_smoothness_loss = rot_smoothness_loss + rot_acc_smoothness_loss * 10
+            if iteration == 0:
+                self.pre_calibrate_loss_weights(loss_components, self.target_ratios)
+            total_loss = (
+                loss_components['rgb'] +
+                self.loss_weights['mask'] * loss_components['mask'] +
+                self.loss_weights['flow'] * loss_components['flow'] +
+                self.loss_weights['proj_joint'] * loss_components['proj_joint'] +
+                self.loss_weights['proj_vert'] * loss_components['proj_vert'] +
+                self.loss_weights['depth'] * loss_components['depth'] +
+                self.args.smooth_weight * total_smoothness_loss +
+                self.args.coherence_weight * joint_coherence_loss +
+                self.args.root_smooth_weight * root_smooth_loss
+            )
+            # Optimization step
+            optimizer.zero_grad()
+            total_loss.backward()
+            torch.nn.utils.clip_grad_norm_(
+                [quats_to_optimize, global_quats, global_trans],
+                max_norm=self.gradient_clip_norm
+            )
+            optimizer.step()
+            scheduler.step()
+            # Update progress bar and logging
+            loss_desc = (
+                f"Loss: {total_loss.item():.4f}, "
+                f"RGB: {loss_components['rgb'].item():.4f}, "
+                f"Mask: {self.loss_weights['mask'] * loss_components['mask'].item():.4f}, "
+                f"Flow: {self.loss_weights['flow'] * loss_components['flow'].item():.4f}, "
+                f"Proj_joint: {self.loss_weights['proj_joint'] * loss_components['proj_joint'].item():.4f}, "
+                f"Proj_vert: {self.loss_weights['proj_vert'] * loss_components['proj_vert'].item():.4f}, "
+                f"Depth: {self.loss_weights['depth'] * loss_components['depth'].item():.4f}, "
+                f"Smooth: {self.args.smooth_weight * total_smoothness_loss.item():.4f}, "
+                f"Joint smooth: {self.args.coherence_weight * joint_coherence_loss.item():.4f}, "
+                f"Root smooth: {self.args.root_smooth_weight * root_smooth_loss.item():.4f}"
+            )
+            pbar.set_description(loss_desc)
+            if iteration % 5 == 0:
+                self.logger.info(f"Iter {iteration}: {loss_desc}")
+            # Adaptive reinitialization
+            current_loss = total_loss.item()
+            if current_loss < best_loss:
+                best_loss = current_loss
+                best_params = {
+                    'quats': quats_to_optimize.clone().detach(),
+                    'global_quats': global_quats.clone().detach(),
+                    'global_trans': global_trans.clone().detach()
+                }
+                patience = 0
+            elif (current_loss > best_loss * self.loss_divergence_factor or
+                  patience > self.reinit_patience_threshold * 2):
+                # Reinitialize with best parameters
+                quats_to_optimize = best_params['quats'].clone().requires_grad_(True)
+                global_quats = best_params['global_quats'].clone().requires_grad_(True)
+                global_trans = best_params['global_trans'].clone().requires_grad_(True)
+                optimizer, scheduler = self._setup_optimizer_and_scheduler(
+                    quats_to_optimize, global_quats, global_trans, n_iters
+                )
+                patience = 0
+                self.logger.info(f'Adaptive reset at iteration {iteration} with best loss: {best_loss:.6f}')
+            else:
+                patience += 1
+            pbar.update(1)
+        pbar.close()
+        # Prepare final results
+        quats_final = torch.cat([fixed_quat_0, best_params['quats']], dim=0)
+        # Final normalization
+        reshaped = quats_final.reshape(-1, 4)
+        norm = torch.norm(reshaped, dim=1, keepdim=True).clamp_min(1e-8)
+        quats_final = (reshaped / norm).reshape(batch_size, num_joints, 4)
+        global_quats_final = torch.cat([fixed_global_quat_0, best_params['global_quats']], dim=0)
+        global_trans_final = torch.cat([fixed_global_trans_0, best_params['global_trans']], dim=0)
+        global_quats_final = global_quats_final / torch.norm(
+            global_quats_final, dim=-1, keepdim=True
+        ).clamp_min(1e-8)
+        return quats_final, global_quats_final, global_trans_final
+def load_and_prepare_data(args):
+    """Load and prepare all necessary data for optimization."""
+    # Define paths
+    base_path = f'{args.input_path}/{args.seq_name}'
+    mesh_path = f'{base_path}/objs/mesh.obj'
+    rig_path = f'{base_path}/objs/rig.txt'
+    img_path = f'{base_path}/imgs'
+    flow_dirs = f'{base_path}/flow'
+    # Load model
+    model = load_model_from_obj_and_rig(mesh_path, rig_path, device=args.device)
+    # Load images
+    img_files = sorted(glob.glob(os.path.join(img_path, "*.png")))
+    images = []
+    for f in img_files:
+        img = Image.open(f).convert("RGBA")
+        arr = np.array(img, dtype=np.float32) / 255.0
+        t = torch.from_numpy(arr).to(args.device)
+        images.append(t)
+    images_batch = torch.stack(images, dim=0)
+    return model, images_batch, flow_dirs, img_path
+def setup_renderers(args):
+    """Setup multiple renderers for different camera views."""
+    available_views = [
+        "front", "back", "left", "right",
+        "front_left", "front_right", "back_left", "back_right"
+    ]
+    if args.main_renderer not in available_views:
+        raise ValueError(f"Main renderer '{args.main_renderer}' not found in available cameras: {available_views}")
+    main_cam_config = json.load(open(f"utils/cameras/{args.main_renderer}.json"))
+    main_renderer = MeshRenderer3D(args.device, image_size=args.img_size, cam_params=main_cam_config)
+    additional_views = [view.strip() for view in args.additional_renderers.split(',') if view.strip()]
+    if len(additional_views) > 3:
+        print(f"Warning: Only first 3 additional renderers will be used. Got: {additional_views}")
+        additional_views = additional_views[:3]
+    additional_renderers = {}
+    for view_name in additional_views:
+        if view_name in available_views and view_name != args.main_renderer:
+            cam_config = json.load(open(f"utils/cameras/{view_name}.json"))
+            renderer = MeshRenderer3D(args.device, image_size=args.img_size, cam_params=cam_config)
+            additional_renderers[f"{view_name}_renderer"] = renderer
+        elif view_name == args.main_renderer:
+            print(f"Warning: '{view_name}' is already the main renderer, skipping...")
+        elif view_name not in available_views:
+            print(f"Warning: Camera view '{view_name}' not found, skipping...")
+    return main_renderer, additional_renderers
+def get_parser():
+    """Create argument parser with all configuration options."""
+    parser = argparse.ArgumentParser(description="3D Rigging Optimization")
+    # Training parameters
+    training_group = parser.add_argument_group('Training')
+    training_group.add_argument("--iter", type=int, default=500, help="Number of training iterations")
+    training_group.add_argument("--img_size", type=int, default=512, help="Image resolution")
+    training_group.add_argument("--device", type=str, default="cuda:0", help="Device to use")
+    training_group.add_argument("--img_fps", type=int, default=15, help="Image frame rate")
+    training_group.add_argument('--main_renderer', type=str, default='front', help='Main renderer camera view (default: front)')
+    training_group.add_argument('--additional_renderers', type=str, default="back, right, left", help='Additional renderer views (max 3), comma-separated (e.g., "back,left,right"). ')
+    # Learning rates
+    lr_group = parser.add_argument_group('Learning Rates')
+    lr_group.add_argument("--lr", type=float, default=2e-3, help="Base learning rate")
+    lr_group.add_argument("--min_lr", type=float, default=1e-5, help="Minimum learning rate")
+    lr_group.add_argument("--warm_lr", type=float, default=1e-5, help="Warmup learning rate")
+    # Loss weights
+    loss_group = parser.add_argument_group('Loss Weights')
+    loss_group.add_argument("--smooth_weight", type=float, default=0.2)
+    loss_group.add_argument("--root_smooth_weight", type=float, default=1.0)
+    loss_group.add_argument("--coherence_weight", type=float, default=10)
+    loss_group.add_argument("--rgb_wt", type=float, default=1.0, help="RGB loss target ratio (relative importance)")
+    loss_group.add_argument("--mask_wt", type=float, default=1.0, help="Mask loss target ratio")
+    loss_group.add_argument("--proj_joint_wt", type=float, default=1.5, help="Joint projection loss target ratio")
+    loss_group.add_argument("--proj_vert_wt", type=float, default=3.0, help="Point projection loss target ratio")
+    loss_group.add_argument("--depth_wt", type=float, default=0.8, help="Depth loss target ratio")
+    loss_group.add_argument("--flow_wt", type=float, default=0.8, help="Flow loss target ratio")
+    # Data and output
+    data_group = parser.add_argument_group('Data and Output')
+    data_group.add_argument("--input_path", type=str, default="inputs")
+    data_group.add_argument("--save_path", type=str, default="results")
+    data_group.add_argument("--save_name", type=str, default="results")
+    data_group.add_argument("--seq_name", type=str, default=None)
+    # Flags
+    flag_group = parser.add_argument_group('Flags')
+    flag_group.add_argument('--gauss_filter', action='store_true', default=False)
+    return parser
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+    # Setup output directory
+    out_dir = f'{args.save_path}/{args.seq_name}/{args.save_name}'
+    save_args(args, out_dir)
+    # Initialize optimizer
+    ani_optimizer = AnimationOptimizer(args, device=args.device)
+    # Setup renderers
+    renderer, additional_renderers = setup_renderers(args)
+    # Load and prepare data
+    model, images_batch, flow_dirs, img_path = load_and_prepare_data(args)
+    # Setup tracking
+    joint_vis_mask = visualize_joints_on_mesh(model, renderer, args.seq_name, out_dir=out_dir)
+    joint_vis_mask = torch.from_numpy(joint_vis_mask).float().to(args.device)
+    joint_project_2d = renderer.project_points(model.joints_rest)
+    # Setup track paths
+    track_2d_path = img_path.replace('imgs', 'track_2d_joints')
+    os.makedirs(track_2d_path, exist_ok=True)
+    # Load or generate tracks
+    if not os.listdir(track_2d_path):
+        print("Generating joint tracks")
+        tracked_joints_2d = save_track(args.seq_name, joint_project_2d, img_path, track_2d_path, out_dir)
+    else:
+        print("Loading existing joint tracks")
+        tracked_joints_2d = np.load(f'{track_2d_path}/pred_tracks.npy')
+    # Setup point tracking
+    vert_vis_mask = visualize_points_on_mesh(model, renderer, args.seq_name, out_dir=out_dir)
+    vert_vis_mask = torch.from_numpy(vert_vis_mask).float().to(args.device)
+    track_verts_2d, track_indices, sampled_vertex_indices = save_track_points(
+        vert_vis_mask, renderer, model, img_path, out_dir, args
+    )
+    vert_vis_mask = vert_vis_mask[sampled_vertex_indices]
+    # Run optimization
+    print(f"Starting optimization")
+    final_quats, root_quats, root_pos = ani_optimizer.optimization(
+        images_batch=images_batch,
+        model=model,
+        renderer=renderer,
+        tracked_joints_2d=tracked_joints_2d,
+        joint_vis_mask=joint_vis_mask,
+        track_verts_2d=track_verts_2d,
+        vert_vis_mask=vert_vis_mask,
+        sampled_vertex_indices=sampled_vertex_indices,
+        track_indices=track_indices,
+        flow_dirs=flow_dirs,
+        n_iters=args.iter,
+        out_dir=out_dir
+    )
+    # Save results
+    save_and_smooth_results(
+        args, model, renderer, final_quats, root_quats, root_pos,
+        out_dir, additional_renderers, fps=10
+    )
+    print("Optimization completed successfully")
+    save_final_video(args)
+if __name__ == "__main__":
+    main()

third_party/Puppeteer/animation/renderer.py ADDED Viewed

	@@ -0,0 +1,348 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import numpy as np
+import torch
+import cv2
+from pytorch3d.structures import join_meshes_as_scene, join_meshes_as_batch, Meshes
+from pytorch3d.renderer import (
+    FoVPerspectiveCameras, look_at_view_transform,
+    RasterizationSettings, MeshRenderer, MeshRasterizer,
+    SoftPhongShader, PointLights, BlendParams, SoftSilhouetteShader
+)
+from utils.loss_utils import compute_visibility_mask_igl
+def create_camera_from_blender_params(cam_params, device):
+    """
+    Convert Blender camera parameters to PyTorch3D camera
+    Args:
+        cam_params (dict): Camera parameters from Blender JSON
+        device: Device to create camera on
+    Returns:
+        FoVPerspectiveCameras: Converted camera
+    """
+    # Extract matrix world and convert to rotation and translation
+    matrix_world = torch.tensor(cam_params['matrix_world'], dtype=torch.float32)
+    # Extract field of view (use x_fov, assuming symmetric FOV)
+    fov = cam_params['x_fov'] * 180 / np.pi  # Convert radians to degrees
+    rotation_matrix = torch.tensor([
+        [1, 0, 0, 0],
+        [0, 0, 1, 0],
+        [0, -1, 0, 0],
+        [0, 0, 0, 1]
+    ], dtype=torch.float32)
+    # Apply transformations
+    adjusted_matrix = rotation_matrix @ matrix_world
+    world2cam_matrix_tensor = torch.linalg.inv(adjusted_matrix)
+    aligned_matrix = torch.tensor([
+        [-1.0, 0.0, 0.0, 0.0],
+        [0.0, 1.0, 0.0, 0.0],
+        [0.0, 0.0, -1.0, 0.0],
+        [0.0, 0.0, 0.0, 1.0]
+    ], dtype=torch.float32, device=device)
+    world2cam_matrix = aligned_matrix @ world2cam_matrix_tensor.to(device)
+    cam2world_matrix = torch.linalg.inv(world2cam_matrix)
+    # Extract rotation and translation
+    R = cam2world_matrix[:3, :3]
+    T = torch.tensor([
+        world2cam_matrix[0, 3],
+        world2cam_matrix[1, 3],
+        world2cam_matrix[2, 3]
+    ], device=device, dtype=torch.float32)
+    return FoVPerspectiveCameras(
+        device=device,
+        fov=fov,
+        R=R[None],
+        T=T[None],
+        znear=0.1,
+        zfar=100.0
+    )
+class MeshRenderer3D:
+    """
+    PyTorch3D mesh renderer with support for various rendering modes.
+    Features:
+    - Standard mesh rendering with Phong shading
+    - Silhouette rendering
+    - Multi-frame batch rendering
+    - Point projection with visibility computation
+    """
+    def __init__(self, device, image_size=1024, cam_params=None, light_params=None, raster_params=None):
+        self.device = device
+        # Initialize camera
+        self.camera = self._setup_camera(cam_params)
+        # Initialize light
+        self.light = self._setup_light(light_params)
+        # Initialize rasterization settings
+        self.raster_settings = self._setup_raster_settings(raster_params, image_size)
+        self.camera.image_size = self.raster_settings.image_size
+        # Initialize renderers
+        self._setup_renderers()
+    def _setup_camera(self, cam_params):
+        """Setup camera based on parameters."""
+        if cam_params is None:
+            # Default camera
+            R, T = look_at_view_transform(3.0, 30, 20, at=[[0.0, 1.0, 0.0]])
+            return FoVPerspectiveCameras(device=self.device, R=R, T=T)
+        # Check if Blender parameters
+        if "matrix_world" in cam_params and "x_fov" in cam_params:
+            return create_camera_from_blender_params(cam_params, self.device)
+        else:
+            raise ValueError("Need to provide blender parameters.")
+    def _setup_light(self, light_params):
+        """Setup light source."""
+        if light_params is None:
+            return PointLights(device=self.device, location=[[0.0, 0.0, 3.0]])
+        location = [[
+            light_params.get('light_x', 0.0),
+            light_params.get('light_y', 0.0),
+            light_params.get('light_z', 3.0)
+        ]]
+        return PointLights(device=self.device, location=location)
+    def _setup_raster_settings(self, raster_params, default_size):
+        """Setup rasterization settings."""
+        if raster_params is None:
+            raster_params = {
+                "image_size": [default_size, default_size],
+                "blur_radius": 0.0,
+                "faces_per_pixel": 1,
+                "bin_size": 0,
+                "cull_backfaces": False
+            }
+        return RasterizationSettings(**raster_params)
+    def _setup_renderers(self) -> None:
+        """Initialize main and silhouette renderers."""
+        rasterizer = MeshRasterizer(
+            cameras=self.camera,
+            raster_settings=self.raster_settings
+        )
+        # Main renderer with Phong shading
+        self.renderer = MeshRenderer(
+            rasterizer=rasterizer,
+            shader=SoftPhongShader(
+                device=self.device,
+                cameras=self.camera,
+                lights=self.light
+            )
+        )
+        # Silhouette renderer
+        blend_params = BlendParams(
+            sigma=1e-4,
+            gamma=1e-4,
+            background_color=(0.0, 0.0, 0.0)
+        )
+        self.silhouette_renderer = MeshRenderer(
+            rasterizer=rasterizer,
+            shader=SoftSilhouetteShader(blend_params=blend_params)
+        )
+    def render(self, meshes):
+        """
+        Render meshes with Phong shading.
+        Args:
+            meshes: Single mesh or list of meshes
+        Returns:
+            Rendered images tensor of shape (1, H, W, C)
+        """
+        scene_mesh = self._prepare_scene_mesh(meshes)
+        return self.renderer(scene_mesh)
+    def render_batch(self, mesh_list):
+        """
+        Render multiple frames as a batch.
+        Args:
+            mesh_list: List of mesh lists (one per frame)
+        Returns:
+            Batch of rendered images of shape (B, H, W, C)
+        """
+        assert isinstance(mesh_list, list)
+        batch_meshes = []
+        for frame_meshes in mesh_list:
+            scene_mesh = self._prepare_scene_mesh(frame_meshes)
+            batch_meshes.append(scene_mesh)
+        batch_mesh = join_meshes_as_batch(batch_meshes)
+        return self.renderer(batch_mesh)
+    def get_rasterization_fragments(self, mesh_list):
+        """
+        Get rasterization fragments for batch of meshes.
+        Args:
+            mesh_list: List of mesh lists (one per frame)
+        Returns:
+            Rasterization fragments
+        """
+        assert isinstance(mesh_list, list)
+        batch_meshes = []
+        for frame_meshes in mesh_list:
+            scene_mesh = self._prepare_scene_mesh(frame_meshes)
+            batch_meshes.append(scene_mesh)
+        batch_mesh = join_meshes_as_batch(batch_meshes)
+        return self.renderer.rasterizer(batch_mesh)
+    def render_silhouette_batch(self, mesh_list):
+        """
+        Render silhouette masks for multiple frames.
+        Args:
+            mesh_list: List of mesh lists (one per frame)
+        Returns:
+            Batch of silhouette masks of shape (B, H, W, 1)
+        """
+        assert isinstance(mesh_list, list)
+        batch_meshes = []
+        for frame_meshes in mesh_list:
+            scene_mesh = self._prepare_scene_mesh(frame_meshes)
+            batch_meshes.append(scene_mesh)
+        batch_mesh = join_meshes_as_batch(batch_meshes)
+        silhouette = self.silhouette_renderer(batch_mesh)
+        return silhouette[..., 3:]  # Return alpha channel
+    def tensor_to_image(self, tensor):
+        """
+        Convert rendered tensor to numpy image array.
+        Args:
+            tensor: Rendered tensor of shape (B, H, W, C)
+        Returns:
+            Numpy array of shape (H, W, 3) with values in [0, 255]
+        """
+        return (tensor[0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+    def project_points(self, points_3d):
+        """
+        Project 3D joints/vertices to 2D image plane
+        Args:
+            points_3d: shape (N, 3) or (B, N, 3) tensor of 3D points
+        Returns:
+            points_2d: shape (N, 2) or (B, N, 2) tensor of 2D projected points
+        """
+        if not torch.is_tensor(points_3d):
+            points_3d = torch.tensor(points_3d, device=self.device, dtype=torch.float32)
+        if len(points_3d.shape) == 2:
+            points_3d = points_3d.unsqueeze(0)  # (1, N, 3)
+        # project points
+        projected = self.camera.transform_points_screen(points_3d, image_size=self.raster_settings.image_size)
+        if projected.shape[0] == 1:
+            projected_points = projected.squeeze(0)[:, :2]
+        else:
+            projected_points = projected[:, :, :2]
+        return projected_points
+    def render_with_points(self, meshes, points_3d, point_radius=3, for_vertices=False):
+        """
+        render the mesh and visualize the joints/vertices on the image
+        Args:
+            meshes: mesh or list of meshes to be rendered
+            points_3d: shape (N, 3) tensor of 3D joints/vertices
+            point_radius: radius of the drawn points
+            for_vertices: if True, compute visibility for vertices, else for joints
+        Returns:
+            Image with joints/vertices drawn, visibility mask
+        """
+        rendered_image = self.render(meshes)
+        # project 3D points to 2D
+        points_2d = self.project_points(points_3d)
+        image_np = rendered_image[0, ..., :3].cpu().numpy()
+        image_with_points = image_np.copy()
+        height, width = image_np.shape[:2]
+        ray_origins = self.camera.get_camera_center()  # (B, 3)
+        ray_origins = np.tile(ray_origins.detach().cpu().numpy(), (points_3d.shape[0], 1))
+        verts = meshes.verts_packed().detach().cpu().numpy()
+        faces = meshes.faces_packed().detach().cpu().numpy()
+        ray_dirs = points_3d.detach().cpu().numpy() - ray_origins # calculate ray directions
+        distances = np.linalg.norm(ray_dirs, axis=1)  # distances from camera to points
+        ray_dirs = (ray_dirs.T / distances).T        # normalize to unit vectors
+        vis_mask = compute_visibility_mask_igl(ray_origins, ray_dirs, distances, verts, faces, distance_tolerance=1e-6, for_vertices=for_vertices)
+        # draw points
+        visible_color=(1, 0, 0) # visible points are red
+        invisible_color=(0, 0, 1) # invisible points are blue
+        for i, point in enumerate(points_2d):
+            x, y = int(point[0].item()), int(point[1].item())
+            if 0 <= x < width and 0 <= y < height:
+                point_color = visible_color if vis_mask[i] else invisible_color
+                cv2.circle(image_with_points, (x, y), point_radius, point_color, -1)
+        result = torch.from_numpy(image_with_points).to(self.device)
+        result = result.unsqueeze(0)
+        if rendered_image.shape[-1] == 4:
+            alpha = rendered_image[..., 3:]
+            result = torch.cat([result, alpha], dim=-1)
+        return result, vis_mask
+    def _prepare_scene_mesh(self, meshes):
+        """Convert meshes to a single scene mesh."""
+        if isinstance(meshes, Meshes):
+            return meshes
+        elif isinstance(meshes, list):
+            return join_meshes_as_scene(meshes)
+        else:
+            raise ValueError("meshes must be Meshes object or list of Meshes")

third_party/Puppeteer/animation/utils/cameras/back.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"matrix_world": [[-1.0, -8.742277657347586e-08, -8.742277657347586e-08, 0.0], [-8.742277657347586e-08, 4.371138118131057e-08, 1.0, 2.0], [-8.74227694680485e-08, 1.0, -4.371138828673793e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [0.0, 2.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [-1.0, -8.742277657347586e-08, -8.74227694680485e-08], "y": [8.742277657347586e-08, -4.371138118131057e-08, -1.0], "z": [8.742277657347586e-08, -1.0, 4.371138828673793e-08]}

third_party/Puppeteer/animation/utils/cameras/back_left.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "matrix_world": [
+    [
+      -0.7071067235853873,
+      -1.5015359289272112e-08,
+      -0.7071068387877032,
+      -1.4142136775754064
+    ],
+    [
+      -0.7071068387877031,
+      -1.1886282763606815e-08,
+      0.7071067235853874,
+      1.4142134471707748
+    ],
+    [
+      -1.9022333375140477e-08,
+      1.0,
+      -2.2125928034189e-09,
+      -4.4251856068378e-09
+    ],
+    [
+      0.0,
+      0.0,
+      0.0,
+      1.0
+    ]
+  ],
+  "format_version": 6,
+  "max_depth": 5.0,
+  "bbox": [
+    [
+      -0.14632226526737213,
+      -0.15228690207004547,
+      -0.5013949275016785
+    ],
+    [
+      0.18149489164352417,
+      0.24675098061561584,
+      0.4873228073120117
+    ]
+  ],
+  "origin": [
+    -1.0,
+    1.0,
+    0.0
+  ],
+  "x_fov": 0.6911112070083618,
+  "y_fov": 0.6911112070083618,
+  "x": [
+    -0.7071067235853873,
+    -1.5015359289272112e-08,
+    -0.7071068387877032
+  ],
+  "y": [
+    -0.7071068387877031,
+    -1.1886282763606815e-08,
+    0.7071067235853874
+  ],
+  "z": [
+    -1.9022333375140477e-08,
+    1.0,
+    -2.2125928034189e-09
+  ]
+}

third_party/Puppeteer/animation/utils/cameras/back_right.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "matrix_world": [
+    [
+      -0.7071067854026265,
+      -7.240741267677819e-08,
+      0.7071067769704649,
+      1.4142135539409297
+    ],
+    [
+      0.7071067769704653,
+      2.4325415404202744e-08,
+      0.7071067854026294,
+      1.4142135708052588
+    ],
+    [
+      -6.840043892397674e-08,
+      0.9999999999999971,
+      3.399910591950217e-08,
+      6.799821183900434e-08
+    ],
+    [
+      0.0,
+      0.0,
+      0.0,
+      1.0
+    ]
+  ],
+  "format_version": 6,
+  "max_depth": 5.0,
+  "bbox": [
+    [
+      -0.14632226526737213,
+      -0.15228690207004547,
+      -0.5013949275016785
+    ],
+    [
+      0.18149489164352417,
+      0.24675098061561584,
+      0.4873228073120117
+    ]
+  ],
+  "origin": [
+    1.0,
+    1.0,
+    0.0
+  ],
+  "x_fov": 0.6911112070083618,
+  "y_fov": 0.6911112070083618,
+  "x": [
+    -0.7071067854026265,
+    -7.240741267677819e-08,
+    0.7071067769704649
+  ],
+  "y": [
+    0.7071067769704653,
+    2.4325415404202744e-08,
+    0.7071067854026294
+  ],
+  "z": [
+    -6.840043892397674e-08,
+    0.9999999999999971,
+    3.399910591950217e-08
+  ]
+}

third_party/Puppeteer/animation/utils/cameras/front.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"matrix_world": [[1.0, 0.0, 0.0, 0.0], [0.0, -4.371138828673793e-08, -1.0, -2.0], [0.0, 1.0, -4.371138828673793e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [0.0, -2.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [1.0, 0.0, 0.0], "y": [-0.0, 4.371138828673793e-08, -1.0], "z": [-0.0, 1.0, 4.371138828673793e-08]}

third_party/Puppeteer/animation/utils/cameras/front_left.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "matrix_world": [
+    [
+      0.7071068078790848,
+      2.869602372390645e-08,
+      -0.7071067544940086,
+      -1.4142135089880172
+    ],
+    [
+      -0.7071067544940088,
+      -6.21956508517485e-09,
+      -0.7071068078790852,
+      -1.4142136157581704
+    ],
+    [
+      -2.468905024866075e-08,
+      0.9999999999999996,
+      1.589325537842967e-08,
+      3.178651075685934e-08
+    ],
+    [
+      0.0,
+      0.0,
+      0.0,
+      1.0
+    ]
+  ],
+  "format_version": 6,
+  "max_depth": 5.0,
+  "bbox": [
+    [
+      -0.14632226526737213,
+      -0.15228690207004547,
+      -0.5013949275016785
+    ],
+    [
+      0.18149489164352417,
+      0.24675098061561584,
+      0.4873228073120117
+    ]
+  ],
+  "origin": [
+    -1.0,
+    -1.0,
+    0.0
+  ],
+  "x_fov": 0.6911112070083618,
+  "y_fov": 0.6911112070083618,
+  "x": [
+    0.7071068078790848,
+    2.869602372390645e-08,
+    -0.7071067544940086
+  ],
+  "y": [
+    -0.7071067544940088,
+    -6.21956508517485e-09,
+    -0.7071068078790852
+  ],
+  "z": [
+    -2.468905024866075e-08,
+    0.9999999999999996,
+    1.589325537842967e-08
+  ]
+}

third_party/Puppeteer/animation/utils/cameras/front_right.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "matrix_world": [
+    [
+      0.7071068078790848,
+      -2.869602372390645e-08,
+      0.7071067544940086,
+      1.4142135089880172
+    ],
+    [
+      0.7071067544940088,
+      -6.21956508517485e-09,
+      -0.7071068078790852,
+      -1.4142136157581704
+    ],
+    [
+      2.468905024866075e-08,
+      0.9999999999999996,
+      1.589325537842967e-08,
+      3.178651075685934e-08
+    ],
+    [
+      0.0,
+      0.0,
+      0.0,
+      1.0
+    ]
+  ],
+  "format_version": 6,
+  "max_depth": 5.0,
+  "bbox": [
+    [
+      -0.14632226526737213,
+      -0.15228690207004547,
+      -0.5013949275016785
+    ],
+    [
+      0.18149489164352417,
+      0.24675098061561584,
+      0.4873228073120117
+    ]
+  ],
+  "origin": [
+    1.0,
+    -1.0,
+    0.0
+  ],
+  "x_fov": 0.6911112070083618,
+  "y_fov": 0.6911112070083618,
+  "x": [
+    0.7071068078790848,
+    -2.869602372390645e-08,
+    0.7071067544940086
+  ],
+  "y": [
+    0.7071067544940088,
+    -6.21956508517485e-09,
+    -0.7071068078790852
+  ],
+  "z": [
+    2.468905024866075e-08,
+    0.9999999999999996,
+    1.589325537842967e-08
+  ]
+}

third_party/Puppeteer/animation/utils/cameras/left.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"matrix_world": [[7.549790126404332e-08, 7.549790126404332e-08, -1.0, -2.0], [-1.0, 5.6999328827277325e-15, -7.549790126404332e-08, 0.0], [0.0, 1.0, 7.549790126404332e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [-2.0, 0.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [7.549790126404332e-08, -1.0, 0.0], "y": [-7.549790126404332e-08, -5.6999328827277325e-15, -1.0], "z": [1.0, 7.549790126404332e-08, -7.549790126404332e-08]}

third_party/Puppeteer/animation/utils/cameras/right.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"matrix_world": [[7.549790126404332e-08, -7.549790126404332e-08, 1.0, 2.0], [1.0, 5.6999328827277325e-15, -7.549790126404332e-08, 0.0], [0.0, 1.0, 7.549790126404332e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [2.0, 0.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [7.549790126404332e-08, 1.0, 0.0], "y": [7.549790126404332e-08, -5.6999328827277325e-15, -1.0], "z": [-1.0, 7.549790126404332e-08, -7.549790126404332e-08]}

third_party/Puppeteer/animation/utils/data_loader.py ADDED Viewed

	@@ -0,0 +1,170 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import numpy as np
+import torch
+import random
+from pytorch3d.io import load_objs_as_meshes, load_obj
+from pytorch3d.renderer import TexturesAtlas
+from pytorch3d.structures import Meshes
+from model import RiggingModel
+def prepare_depth(depth_path, input_frames, device, depth_model):
+    os.makedirs(depth_path, exist_ok=True)
+    depth_path  = f"{depth_path}/depth_gt_raw.pt"
+    if os.path.exists(depth_path):
+        print("load GT depth...")
+        depth_gt_raw = torch.load(depth_path, map_location=device)
+    else:
+        print("run VideoDepthAnything and save.")
+        with torch.no_grad():
+            depth_gt_raw = depth_model.get_depth_maps(input_frames)
+        torch.save(depth_gt_raw.cpu(), depth_path)
+        depth_gt_raw = depth_gt_raw.to(device)
+    return depth_gt_raw
+def normalize_vertices(verts):
+    """Normalize vertices to a unit cube."""
+    vmin, vmax = verts.min(dim=0).values, verts.max(dim=0).values
+    center = (vmax + vmin) / 2.0
+    scale = (vmax - vmin).max()
+    verts_norm = (verts - center) / scale
+    return verts_norm, center, scale
+def build_atlas_texture(obj_path, atlas_size, device):
+    """Load OBJ + materials and bake all textures into a single atlas."""
+    verts, faces, aux = load_obj(
+        obj_path,
+        device=device,
+        load_textures=True,
+        create_texture_atlas=True,
+        texture_atlas_size=atlas_size,
+        texture_wrap="repeat",
+    )
+    atlas = aux.texture_atlas  # (F, R, R, 3)
+    verts_norm, _, _ = normalize_vertices(verts)
+    mesh_atlas = Meshes(
+        verts=[verts_norm],
+        faces=[faces.verts_idx],
+        textures=TexturesAtlas(atlas=[atlas]),
+    )
+    return mesh_atlas
+def read_rig_file(file_path):
+    """
+    Read rig from txt file, our format is the same as RigNet:
+    joints joint_name x y z
+    root root_joint_name
+    skin vertex_idx joint_name weight joint_name weight ...
+    hier parent_joint_name child_joint_name
+    """
+    joints = []
+    bones = []
+    joint_names = []
+    joint_mapping = {}
+    joint_index = 0
+    skinning_data = {}  # Dictionary to store vertex index -> [(joint_idx, weight), ...]
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+    for line in lines:
+        parts = line.split()
+        if line.startswith('joints'):
+            name = parts[1]
+            position = [float(parts[2]), float(parts[3]), float(parts[4])]
+            joints.append(position)
+            joint_names.append(name)
+            joint_mapping[name] = joint_index
+            joint_index += 1
+        elif line.startswith('hier'):
+            parent_joint = joint_mapping[parts[1]]
+            child_joint = joint_mapping[parts[2]]
+            bones.append([parent_joint, child_joint])
+        elif line.startswith('root'):
+            root = joint_mapping[parts[1]]
+        elif line.startswith('skin'):
+            vertex_idx = int(parts[1])
+            if vertex_idx not in skinning_data:
+                skinning_data[vertex_idx] = []
+            for i in range(2, len(parts), 2):
+                if i+1 < len(parts):
+                    joint_name = parts[i]
+                    weight = float(parts[i+1])
+                    if joint_name in joint_mapping:
+                        joint_idx = joint_mapping[joint_name]
+                        skinning_data[vertex_idx].append((joint_idx, weight))
+    return np.array(joints), np.array(bones), root, joint_names, skinning_data
+def load_model_from_obj_and_rig(
+    mesh_path: str,
+    rig_path: str,
+    device: str | torch.device = "cuda",
+    use_skin_color: bool = True,
+    atlas_size: int = 8,
+):
+    """Load a 3D model from OBJ and rig files."""
+    # 1) read raw mesh
+    raw_mesh = load_objs_as_meshes([mesh_path], device=device)
+    verts_raw = raw_mesh.verts_packed()      # (V,3)
+    faces_idx = raw_mesh.faces_packed()      # (F,3)
+    # 2) read rig data
+    joints_np, bones_np, root_idx, joint_names, skinning_data = read_rig_file(rig_path)
+    J = joints_np.shape[0]
+    # parent indices, default -1
+    parent_idx = [-1] * J
+    for p, c in bones_np:
+        parent_idx[c] = p
+    verts_norm, center, scale = normalize_vertices(verts_raw)
+    joints_t = torch.as_tensor(joints_np, dtype=torch.float32, device=device)
+    joints_norm = (joints_t - center) / scale
+    # skin weights tensor (V,J)
+    V = verts_raw.shape[0]
+    skin_weights = torch.zeros(V, J, dtype=torch.float32, device=device)
+    for v_idx, lst in skinning_data.items():
+        for j_idx, w in lst:
+            skin_weights[v_idx, j_idx] = w
+    # 3) texture strategy
+    mesh_norm = build_atlas_texture(mesh_path, atlas_size, device)
+    tex = mesh_norm.textures
+    # 4) pack into Model class
+    model = RiggingModel(device=device)
+    model.vertices = [mesh_norm.verts_packed()]
+    model.faces = [faces_idx]
+    model.textures = [tex]
+    # rig meta
+    model.bones = bones_np  # (B,2)
+    model.parent_indices = parent_idx
+    model.root_index = root_idx
+    model.skin_weights = [skin_weights]
+    model.bind_matrices_inv = torch.eye(4, device=device).unsqueeze(0).expand(J, -1, -1).contiguous()
+    model.joints_rest = joints_norm
+    return model

third_party/Puppeteer/animation/utils/loss_utils.py ADDED Viewed

	@@ -0,0 +1,420 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from third_partys.Video_Depth_Anything.video_depth_anything.video_depth import VideoDepthAnything
+import torch
+import torch.nn as nn
+import numpy as np
+import igl
+import cv2
+import time
+import torch.nn.functional as F
+from utils.quat_utils import quat_inverse, quat_log, quat_multiply, normalize_quaternion
+from pytorch3d.structures import join_meshes_as_scene, join_meshes_as_batch
+import os
+from pathlib import Path
+class DepthModule:
+    def __init__(self, encoder='vitl', device='cuda', input_size=518, fp32=False):
+        """
+        Initialize the depth loss module with Video Depth Anything
+        Args:
+            encoder: 'vitl' or 'vits'
+            device: device to run the model on
+            input_size: input size for the model
+            fp32: whether to use float32 for inference
+        """
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.input_size = input_size
+        self.fp32 = fp32
+        # Initialize model configuration
+        model_configs = {
+            'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
+            'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
+        }
+        # Load Video Depth Anything model
+        self.video_depth_model = VideoDepthAnything(**model_configs[encoder])
+        self.video_depth_model.load_state_dict(
+            torch.load(f'./third_partys/Video_Depth_Anything/ckpt/video_depth_anything_{encoder}.pth', map_location='cpu'),
+            strict=True
+        )
+        self.video_depth_model = self.video_depth_model.to(self.device).eval()
+        for param in self.video_depth_model.parameters():
+            param.requires_grad = False
+    def get_depth_maps(self, frames, target_fps=30):
+        """
+        Get depth maps for video frames
+        """
+        depths, _ = self.video_depth_model.infer_video_depth(
+            frames,
+            target_fps,
+            input_size=self.input_size,
+            device=self.device,
+            fp32=self.fp32
+        )
+        return depths
+def save_depth_as_images(depth_np, output_dir='./depth_images'):
+    """save depth images"""
+    os.makedirs(output_dir, exist_ok=True)
+    for i, depth_map in enumerate(depth_np):
+        depth_map = depth_map.detach().cpu().numpy()
+        valid_mask = (depth_map > 0)
+        if not valid_mask.any():
+            continue
+        valid_min = depth_map[valid_mask].min()
+        valid_max = depth_map[valid_mask].max()
+        normalized = np.zeros_like(depth_map)
+        normalized[valid_mask] = 255.0 * (depth_map[valid_mask] - valid_min) / (valid_max - valid_min)
+        depth_img = normalized.astype(np.uint8)
+        cv2.imwrite(os.path.join(output_dir, f'depth_{i:04d}.png'), depth_img)
+    print(f"Save {len(depth_np)} depth images to {output_dir}")
+def compute_visibility_mask_igl(ray_origins, ray_dirs, distances, verts, faces, distance_tolerance=1e-6, for_vertices=False):
+    """
+    Compute visibility mask using IGL ray-mesh intersection.
+    """
+    num_rays = ray_origins.shape[0]
+    visibility_mask = np.ones(num_rays, dtype=bool)
+    for i in range(num_rays):
+        ray_origin = ray_origins[i].reshape(1, 3)
+        ray_dir = ray_dirs[i].reshape(1, 3)
+        intersections = igl.ray_mesh_intersect(ray_origin, ray_dir, verts, faces)
+        if intersections:
+            # Count intersections that occur before the target point
+            count = sum(1 for h in intersections if h[4] < distances[i] - distance_tolerance)
+            # count=0 → ray completely missed the mesh; count=1 → ray stops exactly at the face containing the joint
+            # count>1 → ray was blocked by other faces along the way
+            if for_vertices:
+                if count != 1:
+                    visibility_mask[i] = False
+            else: # for joints
+                if count > 2:
+                    visibility_mask[i] = False
+    return visibility_mask
+def compute_reprojection_loss(renderer, vis_mask, predicted_joints, tracked_joints_2d, image_size):
+    """
+    Compute reprojection loss between predicted 3D points and tracked 2D points.
+    """
+    if predicted_joints.dim() != 3:
+        raise ValueError(f"predicted_joints must be 3D tensor, got shape {predicted_joints.shape}")
+    B, J, _ = predicted_joints.shape
+    device = predicted_joints.device
+    # Project 3D joints to 2D screen coordinates
+    projected = renderer.camera.transform_points_screen(
+        predicted_joints,
+        image_size=[image_size, image_size]
+    )
+    projected_2d = projected[..., :2]  # (B, J, 2)
+    # Convert and validate tracked joints
+    if not isinstance(tracked_joints_2d, torch.Tensor):
+        tracked_joints_2d = torch.from_numpy(tracked_joints_2d).float()
+    tracked_joints_2d = tracked_joints_2d.to(device)
+    if tracked_joints_2d.dim() == 2:
+        tracked_joints_2d = tracked_joints_2d.unsqueeze(0).expand(B, -1, -1)
+    vis_mask = vis_mask.to(device).float()
+    num_visible = vis_mask.sum()
+    if num_visible == 0:
+        # No visible joints - return zero loss
+        return torch.tensor(0.0, device=device, requires_grad=True)
+    squared_diff = (projected_2d - tracked_joints_2d).pow(2).sum(dim=-1)  # (B, J)
+    vis_mask_expanded = vis_mask.unsqueeze(0)  # (1, J)
+    masked_loss = squared_diff * vis_mask_expanded  # (B, J)
+    per_frame_loss = masked_loss.sum(dim=1) / num_visible  # (B,)
+    final_loss = per_frame_loss.mean()  # scalar
+    return final_loss
+def geodesic_loss(q1, q2, eps=1e-6):
+    """
+    Compute geodesic distance loss between batches of quaternions for rot smooth loss.
+    """
+    q1_norm = normalize_quaternion(q1, eps=eps)
+    q2_norm = normalize_quaternion(q2, eps=eps)
+    dot_product = (q1_norm * q2_norm).sum(dim=-1, keepdim=True)
+    q2_corrected = torch.where(dot_product < 0, -q2_norm, q2_norm)
+    inner_product = (q1_norm * q2_corrected).sum(dim=-1)
+    # Clamp to valid range for arccos to avoid numerical issues
+    inner_product_clamped = torch.clamp(inner_product, min=-1.0 + eps, max=1.0 - eps)
+    theta = 2.0 * torch.arccos(torch.abs(inner_product_clamped))
+    return theta
+def root_motion_reg(root_quats, root_pos):
+    return ((root_pos[1:] - root_pos[:-1])**2).mean(), (geodesic_loss(root_quats[1:], root_quats[:-1])**2).mean()
+def joint_motion_coherence(quats_normed, parent_idx):
+    """
+    Compute joint motion coherence loss to enforce smooth relative motion between parent-child joints.
+    """
+    coherence_loss = 0
+    for j, parent in enumerate(parent_idx):
+        if parent != -1:  # Skip root joint
+            parent_rot = quats_normed[:, parent]  # (T, 4)
+            child_rot = quats_normed[:, j]        # (T, 4)
+            # Compute relative rotation of child w.r.t. parent's local frame
+            # local_rot = parent_rot^(-1) * child_rot
+            local_rot = quat_multiply(quat_inverse(parent_rot), child_rot)
+            local_rot_velocity = local_rot[1:] - local_rot[:-1]  #  (T-1, 4)
+            coherence_loss += local_rot_velocity.pow(2).mean()
+    return coherence_loss
+def read_flo_file(file_path):
+    """
+    Read optical flow from .flo format file.
+    """
+    with open(file_path, 'rb') as f:
+        magic = np.fromfile(f, np.float32, count=1)
+        if len(magic) == 0 or magic[0] != 202021.25:
+               raise ValueError(f'Invalid .flo file format: magic number {magic}')
+        w = np.fromfile(f, np.int32, count=1)[0]
+        h = np.fromfile(f, np.int32, count=1)[0]
+        data = np.fromfile(f, np.float32, count=2*w*h)
+        flow = data.reshape(h, w, 2)
+    return flow
+def load_optical_flows(flow_dir, num_frames):
+    """
+    Load sequence of optical flow files.
+    """
+    flow_dir = Path(flow_dir)
+    flows = []
+    for i in range(num_frames - 1):
+        flow_path = flow_dir / f'flow_{i:04d}.flo'
+        if flow_path.exists():
+            flow = read_flo_file(flow_path)
+            flows.append(flow)
+        else:
+            raise ValueError("No flow files found")
+    return np.stack(flows, axis=0)
+def rasterize_vertex_flow(flow_vertices, meshes, faces, image_size, renderer, eps = 1e-8):
+    """
+    Rasterize per-vertex flow to dense flow field using barycentric interpolation.
+    """
+    B, V, _ = flow_vertices.shape
+    device = flow_vertices.device
+    if isinstance(image_size, int):
+        H = W = image_size
+    else:
+        H, W = image_size
+    batch_meshes = join_meshes_as_batch([join_meshes_as_scene(m) for m in meshes]).to(device)
+    fragments    = renderer.renderer.rasterizer(batch_meshes)
+    pix_to_face = fragments.pix_to_face    # (B, H, W, K)
+    bary_coords = fragments.bary_coords    # (B, H, W, K, 3)
+    flow_scene_list = []
+    for mesh_idx in range(B):
+        mesh = meshes[mesh_idx]
+        V_mesh = mesh.verts_packed().shape[0]
+        if V_mesh > flow_vertices.shape[1]:
+            raise ValueError(f"Mesh {mesh_idx} has {V_mesh} vertices but flow has {flow_vertices.shape[1]}")
+        flow_scene_list.append(flow_vertices[mesh_idx, :V_mesh])
+    flow_vertices_scene = torch.cat(flow_scene_list, dim=0).to(device)
+    faces_scene = batch_meshes.faces_packed()
+    flow_pred = torch.zeros(B, H, W, 2, device=device)
+    valid = pix_to_face[..., 0] >= 0
+    for b in range(B):
+        b_valid = valid[b]  # (H,W)
+        if torch.count_nonzero(b_valid) == 0:
+            print(f"No valid pixels found for batch {b}")
+            continue
+        valid_indices = torch.nonzero(b_valid, as_tuple=True)
+        h_indices, w_indices = valid_indices
+        face_idxs = pix_to_face[b, h_indices, w_indices, 0]  # (N,)
+        bary = bary_coords[b, h_indices, w_indices, 0]       # (N,3)
+        max_face_idx = faces_scene.shape[0] - 1
+        if face_idxs.max() > max_face_idx:
+            raise RuntimeError(f"Face index {face_idxs.max()} exceeds max {max_face_idx}")
+        face_verts = faces_scene[face_idxs]  # (N, 3)
+        f0, f1, f2 = face_verts.unbind(-1)  # Each (N,)
+        max_vert_idx = flow_vertices_scene.shape[0] - 1
+        if max(f0.max(), f1.max(), f2.max()) > max_vert_idx:
+            raise RuntimeError(f"Vertex index exceeds flow_vertices_scene size {max_vert_idx}")
+        v0_flow = flow_vertices_scene[f0]  # (N, 2)
+        v1_flow = flow_vertices_scene[f1]  # (N, 2)
+        v2_flow = flow_vertices_scene[f2]  # (N, 2)
+        # Interpolate using barycentric coordinates
+        b0, b1, b2 = bary.unbind(-1)  # Each (N,)
+        # Ensure barycentric coordinates sum to 1 (numerical stability)
+        bary_sum = b0 + b1 + b2
+        b0 = b0 / (bary_sum + eps)
+        b1 = b1 / (bary_sum + eps)
+        b2 = b2 / (bary_sum + eps)
+        flow_interpolated = (
+            b0.unsqueeze(-1) * v0_flow +
+            b1.unsqueeze(-1) * v1_flow +
+            b2.unsqueeze(-1) * v2_flow
+        )  # (N, 2)
+        # Update flow prediction
+        flow_pred[b, h_indices, w_indices] = flow_interpolated
+    return flow_pred
+def calculate_flow_loss(flow_dir, device, mask, renderer, model):
+    """
+    Calculate optical flow loss with improved error handling and flexibility.
+    """
+    if device is None:
+        device = mask.device
+    T = mask.shape[0]
+    H, W = mask.shape[1:3]
+    if mask.shape[0] == T:
+        flow_mask = mask[1:] # Use frames 1 to T-1
+    else:
+        flow_mask = mask
+    flows_np = load_optical_flows(flow_dir, T)
+    flow_gt = torch.from_numpy(flows_np).float().to(device)  # [T-1, H, W, 2]
+    vertices = model.deformed_vertices[0] # (T,V,3)
+    # Project vertices to get 2D flow
+    proj_t  = renderer.project_points(vertices[:-1])    # (T-1,V,2) in pixels
+    proj_tp = renderer.project_points(vertices[1:])
+    vertex_flow  = proj_tp - proj_t                      # (T-1,V,2) Δx,Δy
+    meshes = [model.get_mesh(t) for t in range(T)]
+    flow_pred = rasterize_vertex_flow(vertex_flow, meshes, model.faces[0], (H,W), renderer)  # (B,H,W,2)
+    eps = 1e-3
+    diff = (flow_pred - flow_gt) * flow_mask.unsqueeze(-1) # (T-1, H, W, 2)
+    loss = torch.sqrt(diff.pow(2).sum(dim=-1) + eps**2)  # Charbonnier loss
+    loss = loss.sum() / (flow_mask.sum() + 1e-6)
+    return loss
+def normalize_depth_from_reference(depth_maps, reference_idx=0, invalid_value=-1.0, invert=False, eps = 1e-8):
+    """
+    Normalize depth maps based on a reference frame with improved robustness.
+    """
+    if depth_maps.dim() != 3:
+        raise ValueError(f"Expected depth_maps with 3 dimensions, got {depth_maps.dim()}")
+    T, H, W = depth_maps.shape
+    device = depth_maps.device
+    reference_depth = depth_maps[reference_idx]
+    valid_mask = (
+        (reference_depth != invalid_value) &
+        (reference_depth > 1e-8) &  # Avoid very small positive values
+        torch.isfinite(reference_depth)  # Exclude inf/nan
+    )
+    valid_values = reference_depth[valid_mask]
+    min_depth = torch.quantile(valid_values, 0.01)  # 1st percentile
+    max_depth = torch.quantile(valid_values, 0.99)  # 99th percentile
+    depth_range = max_depth - min_depth
+    if depth_range < eps:
+        logger.warning(f"Very small depth range ({depth_range:.6f}), using fallback normalization")
+        min_depth = valid_values.min()
+        max_depth = valid_values.max()
+        depth_range = max(max_depth - min_depth, eps)
+    scale = 1.0 / (max_depth - min_depth)
+    offset = -min_depth * scale
+    all_valid_mask = (
+        (depth_maps != invalid_value) &
+        (depth_maps > eps) &
+        torch.isfinite(depth_maps)
+    )
+    normalized_depths = torch.full_like(depth_maps, invalid_value)
+    if all_valid_mask.any():
+        normalized_values = depth_maps[all_valid_mask] * scale + offset
+        if invert:
+            normalized_values = 1.0 - normalized_values
+        normalized_depths[all_valid_mask] = normalized_values
+    return normalized_depths, scale.item(), offset.item()
+def compute_depth_loss_normalized(mono_depths, zbuf_depths, mask):
+    """
+    Compute normalized depth loss.
+    """
+    device = zbuf_depths.device
+    # Normalize both depth types
+    zbuf_norm, z_scale, z_offset = normalize_depth_from_reference(zbuf_depths)
+    mono_norm, m_scale, m_offset = normalize_depth_from_reference(mono_depths, invert=True)
+    valid_zbuf = (zbuf_norm >= 0) & (zbuf_norm <= 1)
+    valid_mono = (mono_norm >= 0) & (mono_norm <= 1)
+    if mask.dtype != torch.bool:
+        mask = mask > 0.5
+    combined_mask = mask & valid_zbuf & valid_mono
+    num_valid = combined_mask.sum().item()
+    if num_valid == 0:
+        print("No valid pixels for depth loss computation")
+        return torch.tensor(0.0, device=device, requires_grad=True)
+    depth_diff = (zbuf_norm - mono_norm) * combined_mask.float()
+    loss = (depth_diff**2).sum() / num_valid
+    return loss

third_party/Puppeteer/animation/utils/misc.py ADDED Viewed

	@@ -0,0 +1,34 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from torch.optim.lr_scheduler import LambdaLR
+def warmup_then_decay(optimizer, total_steps, warmup_steps, max_lr=1e-3, min_lr=1e-5, base_lr=1e-5):
+    """
+    Create a learning rate scheduler with warmup followed by decay.
+    """
+    def lr_lambda(current_step):
+        if current_step < warmup_steps:
+            # warmup: min_lr -> max_lr
+            progress = float(current_step) / float(max(1, warmup_steps))
+            # LR(t) = min_lr + (max_lr - min_lr)*progress
+            return (min_lr + (max_lr - min_lr)*progress) / base_lr
+        else:
+            # decay: warmup_steps -> total_steps
+            progress = float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
+            # LR(t) = max_lr + (min_lr - max_lr)*progress
+            return (max_lr + (min_lr - max_lr)*progress) / base_lr
+    scheduler = LambdaLR(optimizer, lr_lambda)
+    return scheduler

third_party/Puppeteer/animation/utils/quat_utils.py ADDED Viewed

	@@ -0,0 +1,179 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import torch
+from typing import List, Tuple, Optional
+EPS = 1e-8
+def normalize_quaternion(quat: torch.Tensor, eps: float = EPS) -> torch.Tensor:
+    """
+    Normalize quaternions to unit length.
+    Args:
+        quat: Quaternion tensor of shape (..., 4) with (w, x, y, z) format
+        eps: Small value for numerical stability
+    Returns:
+        Normalized quaternions of same shape
+    """
+    norm = torch.norm(quat, dim=-1, keepdim=True)
+    return quat / torch.clamp(norm, min=eps)
+def quat_multiply(q1: torch.Tensor, q2: torch.Tensor) -> torch.Tensor:
+    """
+    Multiply two quaternions using Hamilton product.
+    """
+    w1, x1, y1, z1 = torch.unbind(q1, dim=-1)
+    w2, x2, y2, z2 = torch.unbind(q2, dim=-1)
+    w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+    x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+    y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+    z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+    return torch.stack((w, x, y, z), dim=-1)
+def quat_conjugate(quat: torch.Tensor) -> torch.Tensor:
+    """
+    Compute quaternion conjugate.
+    """
+    w, xyz = quat[..., :1], quat[..., 1:]
+    return torch.cat([w, -xyz], dim=-1)
+def quat_inverse(quat: torch.Tensor, eps: float = EPS) -> torch.Tensor:
+    """
+    Compute quaternion inverse.
+    """
+    conjugate = quat_conjugate(quat)
+    norm_squared = torch.sum(quat * quat, dim=-1, keepdim=True)
+    return conjugate / torch.clamp(norm_squared, min=eps)
+def quat_log(quat: torch.Tensor, eps: float = 1e-6) -> torch.Tensor:
+    """
+    Compute quaternion logarithm, mapping to rotation vectors (axis-angle).
+    """
+    # quat_norm = normalize_quaternion(quat, eps)
+    q_norm = torch.sqrt(torch.sum(quat * quat, dim=-1, keepdim=True))
+    quat_norm = quat / torch.clamp(q_norm, min=eps)
+    w = quat_norm[..., 0:1]    # Scalar part
+    xyz = quat_norm[..., 1:]   # Vector part
+    xyz_norm = torch.norm(xyz, dim=-1, keepdim=True)
+    w_clamped = torch.clamp(w, min=-1.0 + eps, max=1.0 - eps)
+    # half-angle
+    half_angle = torch.acos(torch.abs(w_clamped))
+    safe_xyz_norm = torch.clamp(xyz_norm, min=eps)
+    # Scale factor
+    scale = torch.where(
+        xyz_norm < eps,
+        torch.ones_like(xyz_norm),
+        half_angle / safe_xyz_norm
+    )
+    # Handle quaternion sign ambiguity (q and -q represent same rotation)
+    sign = torch.where(w >= 0, torch.ones_like(w), -torch.ones_like(w))
+    rotation_vector = sign * scale * xyz
+    return rotation_vector
+def quat_rotate_vector(quat: torch.Tensor, vec: torch.Tensor) -> torch.Tensor:
+    """
+    Rotate a 3D vector by a quaternion.
+    """
+    q_vec = quat[..., 1:]  # vector part
+    q_w = quat[..., 0:1]   # scalar part
+    cross1 = torch.cross(q_vec, vec, dim=-1)
+    cross2 = torch.cross(q_vec, cross1, dim=-1)
+    # Apply the rotation formula
+    rotated_vec = vec + 2.0 * q_w * cross1 + 2.0 * cross2
+    return rotated_vec
+def quat_to_rotation_matrix(quat: torch.Tensor, eps: float = EPS) -> torch.Tensor:
+    """
+    Convert quaternions to rotation matrices.
+    """
+    quat_norm = normalize_quaternion(quat, eps)
+    w, x, y, z = torch.unbind(quat_norm, dim=-1)
+    xx, yy, zz = x * x, y * y, z * z
+    xy, xz, yz = x * y, x * z, y * z
+    wx, wy, wz = w * x, w * y, w * z
+    r00 = 1.0 - 2.0 * (yy + zz)
+    r01 = 2.0 * (xy - wz)
+    r02 = 2.0 * (xz + wy)
+    r10 = 2.0 * (xy + wz)
+    r11 = 1.0 - 2.0 * (xx + zz)
+    r12 = 2.0 * (yz - wx)
+    r20 = 2.0 * (xz - wy)
+    r21 = 2.0 * (yz + wx)
+    r22 = 1.0 - 2.0 * (xx + yy)
+    rotation_matrix = torch.stack([
+        r00, r01, r02,
+        r10, r11, r12,
+        r20, r21, r22
+    ], dim=-1)
+    return rotation_matrix.reshape(quat.shape[:-1] + (3, 3))
+def quat_to_transform_matrix(quat: torch.Tensor, pos: torch.Tensor) -> torch.Tensor:
+    """
+    Convert quaternion and position to 4x4 transformation matrix.
+    """
+    # rotation part
+    rotation = quat_to_rotation_matrix(quat)
+    batch_shape = rotation.shape[:-2]
+    # homogeneous transformation matrix
+    transform = torch.zeros(batch_shape + (4, 4), dtype=rotation.dtype, device=rotation.device)
+    transform[..., :3, :3] = rotation
+    transform[..., :3, 3] = pos
+    transform[..., 3, 3] = 1.0
+    return transform
+def compute_rest_local_positions(
+    joint_positions: torch.Tensor,
+    parent_indices: List[int]
+) -> torch.Tensor:
+    """
+    Compute local positions relative to parent joints from global joint positions.
+    """
+    num_joints = joint_positions.shape[0]
+    local_positions = torch.zeros_like(joint_positions)
+    for j in range(num_joints):
+        parent_idx = parent_indices[j]
+        if parent_idx >= 0 and parent_idx != j and parent_idx < num_joints:
+            # Child joint: local offset = global_pos - parent_global_pos
+            local_positions[j] = joint_positions[j] - joint_positions[parent_idx]
+        else:
+            # Root joint: use global position as local position
+            local_positions[j] = joint_positions[j]
+    return local_positions

third_party/Puppeteer/animation/utils/render_first_frame.py ADDED Viewed

	@@ -0,0 +1,93 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import json
+import argparse
+from pathlib import Path
+import torch
+from pytorch3d.io import load_objs_as_meshes
+from pytorch3d.renderer import TexturesVertex
+from pytorch3d.structures import Meshes
+from PIL import Image
+from renderer import MeshRenderer3D
+from utils.save_utils import render_single_mesh
+def render_mesh_all_cameras(mesh_path, cameras_dir, output_dir="renders", image_size=512, device="cuda:0"):
+    """
+    Render mesh from all camera viewpoints in the cameras directory.
+    Args:
+        mesh_path: Path to OBJ mesh file
+        cameras_dir: Directory containing camera JSON config files
+        output_dir: Output directory for rendered images
+        image_size: Output image size
+        device: Device to use
+    """
+    cameras_dir = Path(cameras_dir)
+    output_dir = Path(output_dir)
+    # Find all JSON camera config files
+    json_files = list(cameras_dir.glob("*.json"))
+    if not json_files:
+        print(f"No JSON camera files found in {cameras_dir}")
+        return
+    print(f"Found {len(json_files)} camera configurations")
+    # Render from each camera viewpoint
+    for json_file in json_files:
+        # Load camera config
+        with open(json_file, 'r') as f:
+            cam_params = json.load(f)
+        # Setup renderer for this camera
+        renderer = MeshRenderer3D(device=device, image_size=image_size, cam_params=cam_params)
+        camera_name = json_file.stem
+        output_path = output_dir / f"render_{camera_name}.png"
+        render_single_mesh(renderer, mesh_path, str(output_path))
+    print(f"All renders saved to: {output_dir}")
+def main():
+    parser = argparse.ArgumentParser(description="Render a mesh to an image")
+    parser.add_argument('--input_path', type=str, help="base input path")
+    parser.add_argument('--seq_name', type=str, help="sequence name")
+    parser.add_argument("--cameras_dir", default="utils/cameras", help="Camera config JSON file")
+    parser.add_argument("-s", "--size", type=int, default=512, help="Image size")
+    parser.add_argument("-d", "--device", default="cuda:0", help="Device to use")
+    args = parser.parse_args()
+    mesh_path = f'{args.input_path}/{args.seq_name}/objs/mesh.obj'
+    if not os.path.exists(mesh_path):
+        print(f"Error: Mesh file not found: {mesh_path}")
+    output_dir = f'{args.input_path}/{args.seq_name}/first_frames/'
+    os.makedirs(output_dir, exist_ok=True)
+    render_mesh_all_cameras(
+        mesh_path=mesh_path,
+        cameras_dir=args.cameras_dir,
+        output_dir=output_dir,
+        image_size=args.size,
+        device=args.device
+    )
+if __name__ == "__main__":
+    main()

third_party/Puppeteer/animation/utils/save_flow.py ADDED Viewed

	@@ -0,0 +1,297 @@

+# Copyright (c) 2021 Henrique Morimitsu
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: Apache License 2.0
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under Apache License 2.0, with the full license text
+# available at https://github.com/hmorimitsu/ptlflow/blob/main/LICENSE.
+#
+# This modified file is released under the same license.
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+This module processes PNG frame sequences to generate optical flow using PTLFlow,
+with support for visualization and video generation.
+"""
+import argparse
+import os
+import subprocess
+import shutil
+import logging
+from pathlib import Path
+from typing import List, Tuple, Optional, Union
+import cv2 as cv
+import torch
+import numpy as np
+from tqdm import tqdm
+from third_partys.ptlflow.ptlflow.utils import flow_utils
+from third_partys.ptlflow.ptlflow.utils.io_adapter import IOAdapter
+import third_partys.ptlflow.ptlflow as ptlflow
+class OpticalFlowProcessor:
+    """Handles optical flow computation and visualization."""
+    def __init__(
+        self,
+        model_name: str = 'dpflow',
+        checkpoint: str = 'sintel',
+        device: Optional[str] = None,
+        resize_to: Optional[Tuple[int, int]] = None
+    ):
+        """
+        Initialize optical flow processor.
+        Args:
+            model_name: Name of the flow model to use
+            checkpoint: Checkpoint/dataset name for the model
+            device: Device to run on (auto-detect if None)
+            resize_to: Optional (width, height) to resize frames
+        """
+        self.model_name = model_name
+        self.checkpoint = checkpoint
+        self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
+        self.resize_to = resize_to
+        # Initialize model
+        self.model = ptlflow.get_model(model_name, ckpt_path=checkpoint).to(self.device).eval()
+        print(f"Loaded {model_name} model on {self.device}")
+        self.io_adapter = None
+    def load_frame_sequence(self, frames_dir: Union[str, Path]) -> Tuple[List[np.ndarray], List[Path]]:
+        """
+        Load PNG frame sequence from directory.
+        """
+        frames_dir = Path(frames_dir)
+        if not frames_dir.exists():
+            raise FileNotFoundError(f"Frames directory not found: {frames_dir}")
+        # Find PNG files and sort naturally
+        png_files = list(frames_dir.glob('*.png'))
+        if len(png_files) < 2:
+            raise ValueError(f"Need at least 2 PNG frames, found {len(png_files)} in {frames_dir}")
+        # Natural sorting for proper frame order
+        png_files.sort(key=lambda x: self._natural_sort_key(x.name))
+        frames = []
+        for png_path in tqdm(png_files, desc="Loading frames"):
+            # Load image in color
+            img_bgr = cv.imread(str(png_path), cv.IMREAD_COLOR)
+            if self.resize_to:
+                img_bgr = cv.resize(img_bgr, self.resize_to, cv.INTER_LINEAR)
+            img_rgb = cv.cvtColor(img_bgr, cv.COLOR_BGR2RGB)
+            frames.append(img_rgb)
+        return frames, png_files
+    def _natural_sort_key(self, filename: str) -> List[Union[int, str]]:
+        """Natural sorting key for filenames with numbers."""
+        import re
+        return [int(text) if text.isdigit() else text.lower()
+                for text in re.split('([0-9]+)', filename)]
+    def compute_optical_flow_sequence(
+        self,
+        frames: List[np.ndarray],
+        flow_vis_dir: Union[str, Path],
+        flow_save_dir: Optional[Union[str, Path]] = None,
+        save_visualizations: bool = True
+    ) -> List[torch.Tensor]:
+        """
+        Compute optical flow for entire frame sequence.
+        """
+        if len(frames) < 2:
+            raise ValueError("Need at least 2 frames for optical flow")
+        flow_vis_dir = Path(flow_vis_dir)
+        flow_save_dir = Path(flow_save_dir) if flow_save_dir else flow_vis_dir
+        H, W = frames[0].shape[:2]
+        # Initialize IO adapter
+        if self.io_adapter is None:
+            self.io_adapter = IOAdapter(self.model, (H, W))
+        flows = []
+        for i in tqdm(range(len(frames) - 1), desc="Computing optical flow"):
+            # Prepare frame pair
+            frame_pair = [frames[i], frames[i + 1]]
+            raw_inputs = self.io_adapter.prepare_inputs(frame_pair)
+            imgs = raw_inputs['images'][0]  # (2, 3, H, W)
+            pair_tensor = torch.stack((imgs[0:1], imgs[1:2]), dim=1).squeeze(0)  # (1, 2, 3, H, W)
+            pair_tensor = pair_tensor.to(self.device, non_blocking=True).contiguous()
+            with torch.no_grad():
+                flow_result = self.model({'images': pair_tensor.unsqueeze(0)})
+                flow = flow_result['flows'][0]  # (1, 2, H, W)
+            flows.append(flow)
+            if save_visualizations:
+                self._save_flow_outputs(flow, i, flow_vis_dir, flow_save_dir)
+        return flows
+    def _save_flow_outputs(
+        self,
+        flow_tensor: torch.Tensor,
+        frame_idx: int,
+        viz_dir: Path,
+        flow_dir: Path
+    ) -> None:
+        """Save flow outputs in both .flo and visualization formats."""
+        # Save raw flow (.flo format)
+        flow_hw2 = flow_tensor[0]  # (2, H, W)
+        flow_np = flow_hw2.permute(1, 2, 0).cpu().numpy()  # (H, W, 2)
+        flow_path = flow_dir / f'flow_{frame_idx:04d}.flo'
+        flow_utils.flow_write(flow_path, flow_np)
+        # Save visualization
+        flow_rgb = flow_utils.flow_to_rgb(flow_tensor)[0]  # Remove batch dimension
+        if flow_rgb.dim() == 4:  # (Npred, 3, H, W)
+            flow_rgb = flow_rgb[0]
+        flow_rgb_np = (flow_rgb * 255).byte().permute(1, 2, 0).cpu().numpy()  # (H, W, 3)
+        viz_bgr = cv.cvtColor(flow_rgb_np, cv.COLOR_RGB2BGR)
+        viz_path = viz_dir / f'flow_viz_{frame_idx:04d}.png'
+        cv.imwrite(str(viz_path), viz_bgr)
+def create_flow_video(
+    image_dir: Union[str, Path],
+    output_filename: str = 'flow.mp4',
+    fps: int = 10,
+    pattern: str = 'flow_viz_*.png',
+    cleanup_temp: bool = True
+) -> bool:
+    """
+    Create MP4 video from flow visualization images.
+    """
+    image_dir = Path(image_dir)
+    if not image_dir.exists():
+        print(f"Image directory not found: {image_dir}")
+    image_files = sorted(image_dir.glob(pattern))
+    if not image_files:
+        print(f"No images found matching pattern '{pattern}' in {image_dir}")
+    temp_dir = image_dir / 'temp_sequence'
+    temp_dir.mkdir(exist_ok=True)
+    try:
+        # Copy files with sequential naming
+        for i, img_file in enumerate(image_files):
+            temp_name = temp_dir / f'frame_{i:05d}.png'
+            shutil.copy2(img_file, temp_name)
+        # Create video using ffmpeg
+        output_path = image_dir / output_filename
+        cmd = [
+            'ffmpeg', '-y',
+            '-framerate', str(fps),
+            '-i', str(temp_dir / 'frame_%05d.png'),
+            '-c:v', 'libx264',
+            '-pix_fmt', 'yuv420p',
+            str(output_path)
+        ]
+        subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        return True
+    except Exception as e:
+        print(f"Video creation failed: {e}")
+        return False
+    finally:
+        if cleanup_temp and temp_dir.exists():
+            shutil.rmtree(temp_dir)
+def main(
+    frames_dir: Union[str, Path],
+    flow_vis_dir: Union[str, Path] = 'flow_out',
+    flow_save_dir: Optional[Union[str, Path]] = None,
+    resize_to: Optional[Tuple[int, int]] = None,
+    model_name: str = 'dpflow',
+    checkpoint: str = 'sintel'
+) -> bool:
+    # Initialize processor
+    processor = OpticalFlowProcessor(
+        model_name=model_name,
+        checkpoint=checkpoint,
+        resize_to=resize_to
+    )
+    # Load frames
+    frames, png_paths = processor.load_frame_sequence(frames_dir)
+    # Compute optical flow
+    flows = processor.compute_optical_flow_sequence(
+        frames=frames,
+        flow_vis_dir=flow_vis_dir,
+        flow_save_dir=flow_save_dir,
+        save_visualizations=True
+    )
+    # Create video
+    create_flow_video(flow_vis_dir)
+def get_parser():
+    parser = argparse.ArgumentParser(description="Optical flow inference on frame sequences")
+    parser.add_argument('--input_path', type=str, help="base input path")
+    parser.add_argument('--seq_name', type=str, help="sequence name")
+    parser.add_argument('--model_name', type=str, default='dpflow', help="Optical flow model to use")
+    parser.add_argument('--checkpoint', type=str, default='sintel', help="Model checkpoint/dataset name")
+    parser.add_argument('--resize_width', type=int, default=None, help="Resize frame width (must specify both width and height)")
+    parser.add_argument('--resize_height', type=int, default=None, help="Resize frame height (must specify both width and height)")
+    parser.add_argument('--fps', type=int, default=10, help="Frame rate for output video")
+    return parser
+if __name__ == '__main__':
+    parser = get_parser()
+    args = parser.parse_args()
+    # Path
+    frames_dir = f'{args.input_path}/{args.seq_name}/imgs'
+    flow_vis_dir = frames_dir.replace("imgs", "flow_vis")
+    flow_save_dir = frames_dir.replace("imgs", "flow")
+    os.makedirs(flow_vis_dir, exist_ok=True)
+    os.makedirs(flow_save_dir, exist_ok=True)
+    # Prepare resize parameter
+    resize_to = None
+    if args.resize_width and args.resize_height:
+        resize_to = (args.resize_width, args.resize_height)
+    # Process optical flow
+    success = main(
+        frames_dir=frames_dir,
+        flow_vis_dir=flow_vis_dir,
+        flow_save_dir=flow_save_dir,
+        resize_to=resize_to,
+        model_name=args.model_name,
+        checkpoint=args.checkpoint
+    )
+    print("Optical flow processing completed successfully")

third_party/Puppeteer/animation/utils/save_utils.py ADDED Viewed

	@@ -0,0 +1,374 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from pytorch3d.io import load_obj
+from pytorch3d.renderer import TexturesAtlas
+from pytorch3d.structures import Meshes
+import os
+import torch
+import json
+import numpy as np
+from tqdm import tqdm
+from pathlib import Path
+import subprocess
+from PIL import Image
+from scipy.ndimage import gaussian_filter1d
+from third_partys.co_tracker.save_track import save_track
+def render_single_mesh(renderer, mesh_path, out_path="render_result.png", atlas_size=8):
+    """
+    Test render a single mesh and save the result.
+    """
+    device = renderer.device
+    verts, faces, aux = load_obj(
+        mesh_path,
+        device=device,
+        load_textures=True,
+        create_texture_atlas=True,
+        texture_atlas_size=atlas_size,
+        texture_wrap="repeat"
+    )
+    atlas = aux.texture_atlas            # (F, atlas_size, atlas_size, 3)
+    vmin, vmax = verts.min(0).values, verts.max(0).values
+    center = (vmax + vmin) / 2.
+    scale  = (vmax - vmin).max()
+    verts  = (verts - center) / scale
+    mesh_norm = Meshes(
+        verts=[verts],
+        faces=[faces.verts_idx],
+        textures=TexturesAtlas(atlas=[atlas])
+    )
+    with torch.no_grad():
+        rendered = renderer.render(mesh_norm)  # shape=[1, H, W, 4]
+    rendered_img = renderer.tensor_to_image(rendered)
+    pil_img = Image.fromarray(rendered_img)
+    pil_img.save(out_path)
+    print(f"Saved render to {out_path}")
+def apply_gaussian_smoothing(data, sigma = 1.0, preserve_first_frame = True, eps = 1e-8):
+    """
+    Apply Gaussian smoothing along the time axis with quaternion normalization.
+    """
+    smoothed = gaussian_filter1d(data, sigma=sigma, axis=0)
+    # Preserve first frame if requested
+    if preserve_first_frame and data.shape[0] > 0:
+        smoothed[0] = data[0]
+    if data.shape[-1] == 4:
+        norms = np.linalg.norm(smoothed, axis=-1, keepdims=True)
+        smoothed = smoothed / np.maximum(norms, eps)
+    return smoothed
+def render_single_view_sequence(quats, root_quats, root_pos, renderer, model, output_dir, view_name, fps = 25):
+    """
+    Render animation sequence from a single viewpoint.
+    """
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    T = quats.shape[0]
+    model.animate(quats, root_quats, root_pos)
+    for i in tqdm(range(T), desc=f"Rendering {view_name}"):
+        mesh = model.get_mesh(i)
+        rendered = renderer.render(mesh)
+        img_array = renderer.tensor_to_image(rendered)
+        img = Image.fromarray(img_array)
+        frame_path = output_dir / f"{view_name}_frame_{i:04d}.png"
+        img.save(frame_path)
+    # Create video
+    video_path = output_dir / f"{view_name}_output_video.mp4"
+    cmd = f"ffmpeg -y -framerate {fps} -i {output_dir}/{view_name}_frame_%04d.png -c:v libx264 -pix_fmt yuv420p {video_path}"
+    subprocess.call(cmd, shell=True)
+def save_and_smooth_results(args, model, renderer, final_quats, root_quats, root_pos, out_dir, additional_renderers = None, load_pt = False, sigma = 1.0, fps = 25):
+    """
+    Save and smooth animation results with multi-view rendering.
+    """
+    device = final_quats.device
+    T = final_quats.shape[0]
+    # Save Raw Results
+    if not load_pt:
+        raw_dir = os.path.join(out_dir, "raw")
+        os.makedirs(raw_dir, exist_ok=True)
+        torch.save(final_quats, os.path.join(raw_dir, "local_quats.pt"))
+        torch.save(root_quats, os.path.join(raw_dir, "root_quats.pt"))
+        torch.save(root_pos, os.path.join(raw_dir, "root_pos.pt"))
+        if hasattr(model, 'rest_local_positions'):
+            torch.save(model.rest_local_positions, os.path.join(raw_dir, "rest_local_positions.pt"))
+        print(f"Saved raw motion to {raw_dir}")
+    quats_np = final_quats.cpu().numpy()
+    root_quats_np = root_quats.cpu().numpy()
+    root_pos_np = root_pos.cpu().numpy()
+    # Apply Gaussian smoothing if enabled
+    if args.gauss_filter:
+        print(f"Applying Gaussian smoothing (sigma={sigma})")
+        smooth_quats_np = apply_gaussian_smoothing(
+            quats_np, sigma=sigma, preserve_first_frame=True
+        )
+        smooth_root_quats_np = apply_gaussian_smoothing(
+            root_quats_np, sigma=sigma, preserve_first_frame=True
+        )
+        smooth_root_pos_np = apply_gaussian_smoothing(
+            root_pos_np, sigma=sigma, preserve_first_frame=True
+        )
+        smooth_dir = os.path.join(out_dir, "smoothed")
+        os.makedirs(smooth_dir, exist_ok=True)
+        save_dir = smooth_dir
+    else:
+        smooth_quats_np = quats_np
+        smooth_root_quats_np = root_quats_np
+        smooth_root_pos_np = root_pos_np
+        save_dir = raw_dir
+    smooth_quats = torch.tensor(smooth_quats_np, dtype=torch.float32, device=device)
+    smooth_root_quats = torch.tensor(smooth_root_quats_np, dtype=torch.float32, device=device)
+    smooth_root_pos = torch.tensor(smooth_root_pos_np, dtype=torch.float32, device=device)
+    # Render Sequences
+    if not load_pt and args.gauss_filter:
+        smooth_dir_path = Path(smooth_dir)
+        torch.save(smooth_quats, smooth_dir_path / "local_quats.pt")
+        torch.save(smooth_root_quats, smooth_dir_path / "root_quats.pt")
+        torch.save(smooth_root_pos, smooth_dir_path / "root_pos.pt")
+        print(f"Saved smoothed motion to {smooth_dir}")
+    # Render main view
+    print(f"Rendering {args.main_renderer} view ({T} frames)")
+    render_single_view_sequence(
+        smooth_quats, smooth_root_quats, smooth_root_pos,
+        renderer, model, save_dir, args.main_renderer, fps
+    )
+    # Render additional views if provided
+    if additional_renderers:
+        for renderer_key, view_renderer in additional_renderers.items():
+            view_name = renderer_key.replace("_renderer", "")
+            render_single_view_sequence(
+                smooth_quats, smooth_root_quats, smooth_root_pos,
+                view_renderer, model, save_dir, view_name, fps
+            )
+def save_args(args, output_dir, filename="config.json"):
+    args_dict = vars(args)
+    os.makedirs(output_dir, exist_ok=True)
+    config_path = os.path.join(output_dir, filename)
+    with open(config_path, 'w') as f:
+        json.dump(args_dict, f, indent=4)
+def visualize_joints_on_mesh(model, renderer, seq_name, out_dir):
+    """
+    Render mesh with joint visualizations and return visibility mask.
+    """
+    joints_2d = renderer.project_points(model.joints_rest)
+    mesh = model.get_mesh()
+    image_with_joints, vis_mask = renderer.render_with_points(mesh, model.joints_rest)
+    image_np = image_with_joints[0].cpu().numpy()
+    if image_np.shape[2] == 4:
+        image_rgb = image_np[..., :3]
+    else:
+        image_rgb = image_np
+    if image_rgb.max() <= 1.0:
+        image_rgb = (image_rgb * 255).astype(np.uint8)
+    img = Image.fromarray(image_rgb)
+    output_path = f"{out_dir}/mesh_with_joints_{seq_name}_visible.png"
+    img.save(output_path)
+    return vis_mask
+def visualize_points_on_mesh(model, renderer, seq_name, out_dir):
+    """
+    Render mesh with point visualizations and return visibility mask.
+    """
+    points_2d = renderer.project_points(model.vertices[0])
+    mesh = model.get_mesh()
+    image_with_points, vis_mask = renderer.render_with_points(mesh, model.vertices[0], for_vertices=True)
+    image_np = image_with_points[0].cpu().numpy()
+    if image_np.shape[2] == 4:
+        image_rgb = image_np[..., :3]
+    else:
+        image_rgb = image_np
+    if image_rgb.max() <= 1.0:
+        image_rgb = (image_rgb * 255).astype(np.uint8)
+    img = Image.fromarray(image_rgb)
+    output_path = f"{out_dir}/mesh_with_verts_{seq_name}_visible.png"
+    img.save(output_path)
+    return vis_mask
+def save_track_points(point_vis_mask, renderer, model, img_path, out_dir, args):
+    """
+    Save and track selected points on the mesh with intelligent sampling.
+    """
+    vertex_project_2d = renderer.project_points(model.vertices[0])
+    visible_indices = torch.where(point_vis_mask)[0]
+    track_2d_point_path = img_path.replace('imgs', 'track_2d_verts')
+    os.makedirs(track_2d_point_path, exist_ok=True)
+    num_visible = len(visible_indices)
+    MAX_VISIBLE_POINTS = 15000
+    MAX_SAMPLE_POINTS = 4000
+    # Determine tracking strategy
+    tracking_mode = "full" if num_visible <= MAX_VISIBLE_POINTS else "sampled"
+    if not os.listdir(track_2d_point_path):
+        # Generate new tracking data
+        if tracking_mode == "full":
+            print(f"Saving tracks for all visible vertices (count: {num_visible})")
+            # Track all visible points
+            visible_vertex_project_2d = vertex_project_2d[visible_indices]
+            track_2d_point = save_track(
+                args.seq_name, visible_vertex_project_2d, img_path,
+                track_2d_point_path, out_dir, for_point=True
+            )
+            np.save(f'{track_2d_point_path}/visible_indices.npy',
+                    visible_indices.cpu().numpy())
+            # Sample subset for final use
+            num_sample = min(MAX_SAMPLE_POINTS, num_visible)
+            sampled_local_indices = torch.randperm(num_visible)[:num_sample]
+            sampled_vertex_indices = visible_indices[sampled_local_indices]
+            np.save(f'{track_2d_point_path}/sampled_indices.npy',
+                    sampled_vertex_indices.cpu().numpy())
+        else:
+            print(f"Too many visible vertices ({num_visible} > {MAX_VISIBLE_POINTS}), "
+                    f"tracking only {MAX_SAMPLE_POINTS} sampled vertices")
+            # Sample points directly from visible set
+            num_sample = min(MAX_SAMPLE_POINTS, num_visible)
+            sampled_local_indices = torch.randperm(num_visible)[:num_sample]
+            sampled_vertex_indices = visible_indices[sampled_local_indices]
+            # Track only sampled points
+            sampled_vertex_project_2d = vertex_project_2d[sampled_vertex_indices]
+            track_2d_point = save_track(
+                args.seq_name, sampled_vertex_project_2d, img_path,
+                track_2d_point_path, out_dir, for_point=True
+            )
+            np.save(f'{track_2d_point_path}/visible_indices.npy',
+                    visible_indices.cpu().numpy())
+            np.save(f'{track_2d_point_path}/sampled_indices.npy',
+                    sampled_vertex_indices.cpu().numpy())
+    else:
+        # Load existing tracking data
+        print("Loading existing vertex tracks")
+        track_2d_point = np.load(f'{track_2d_point_path}/pred_tracks.npy')
+        visible_indices = np.load(f'{track_2d_point_path}/visible_indices.npy')
+        visible_indices = torch.from_numpy(visible_indices).long().to(args.device)
+        sampled_vertex_indices = np.load(f'{track_2d_point_path}/sampled_indices.npy')
+        sampled_vertex_indices = torch.from_numpy(sampled_vertex_indices).long().to(args.device)
+    track_2d_point = torch.from_numpy(track_2d_point).float().to(args.device)
+    # Create index mapping for tracking data
+    if tracking_mode == "full":
+        # Map from original vertex indices to positions in tracking data
+        vertex_to_track_idx = {idx.item(): i for i, idx in enumerate(visible_indices)}
+        track_indices = torch.tensor(
+            [vertex_to_track_idx[idx.item()] for idx in sampled_vertex_indices],
+            device=args.device, dtype=torch.long
+        )
+    else:
+        # Direct mapping for sampled-only tracking
+        track_indices = torch.arange(len(sampled_vertex_indices),
+                                    device=args.device, dtype=torch.long)
+    return track_2d_point, track_indices, sampled_vertex_indices
+def save_final_video(args):
+    additional_views = [view.strip() for view in args.additional_renderers.split(',') if view.strip()]
+    if len(additional_views) > 3:
+        additional_views = additional_views[:3]
+    additional_views = [view for view in additional_views if view != args.main_renderer]
+    save_dir = 'raw' if not args.gauss_filter else 'smoothed'
+    import subprocess
+    cmd = (
+        f'ffmpeg '
+        f'-i {args.input_path}/{args.seq_name}/input.mp4 '
+        f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{args.main_renderer}_output_video.mp4 '
+        '-filter_complex "'
+        '[0:v][1:v]hstack=inputs=2[stacked]; '
+        '[stacked]drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'gt\':x=(w/4-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+        f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'ours\':x=(3*w/4-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10" '
+        f'-c:a copy {args.save_path}/{args.seq_name}/{args.save_name}/concat_output.mp4'
+    )
+    subprocess.call(cmd, shell=True)
+    cmd = (
+        f'ffmpeg '
+        f'-i {args.input_path}/{args.seq_name}/input.mp4 '
+        f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{args.main_renderer}_output_video.mp4 '
+        f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{additional_views[0]}_output_video.mp4 '
+        f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{additional_views[1]}_output_video.mp4 '
+        f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{additional_views[2]}_output_video.mp4 '
+        '-filter_complex "'
+        '[0:v][1:v][2:v][3:v][4:v]hstack=inputs=5[stacked]; '
+        '[stacked]drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'gt\':x=(w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+        f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{args.main_renderer}\':x=(3*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+        f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{additional_views[0]}\':x=(5*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+        f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{additional_views[1]}\':x=(7*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+        f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{additional_views[2]}\':x=(9*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10" '
+        f'-c:a copy {args.save_path}/{args.seq_name}/{args.save_name}/concat_output_4view.mp4'
+    )
+    subprocess.call(cmd, shell=True)
+def load_motion_data(motion_dir, device="cuda:0"):
+    """
+    Load saved motion data.
+    """
+    local_quats = torch.load(os.path.join(motion_dir, "local_quats.pt"), map_location=device)
+    root_quats = torch.load(os.path.join(motion_dir, "root_quats.pt"), map_location=device)
+    root_pos = torch.load(os.path.join(motion_dir, "root_pos.pt"), map_location=device)
+    # Load rest positions if available (for reference)
+    rest_pos_path = os.path.join(motion_dir, "rest_local_positions.pt")
+    if os.path.exists(rest_pos_path):
+        rest_positions = torch.load(rest_pos_path, map_location=device)
+    else:
+        rest_positions = None
+        print("Warning: rest_local_positions.pt not found, model should have them initialized")
+    return local_quats, root_quats, root_pos, rest_positions

third_party/Puppeteer/checkpoints/rig.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0019dfc4b32d63c1392aa264aed2253c1e0c2fb09216f8e2cc269bbfb8bb49b5
+size 9

third_party/Puppeteer/demo_animation.sh ADDED Viewed

	@@ -0,0 +1,63 @@

+#!/bin/bash
+echo "Running animation..."
+# copy rig and mesh for animation
+for txt_file in results/final_rigging/*.txt; do
+    if [ -f "$txt_file" ]; then
+        seq_name=$(basename "$txt_file" .txt)
+        mkdir -p "examples/$seq_name/objs/"
+        cp "$txt_file" "examples/$seq_name/objs/rig.txt"
+        echo "Copied $txt_file -> examples/$seq_name/objs/rig.txt"
+        obj_file="examples/$seq_name.obj"
+        if [ -f "$obj_file" ]; then
+            cp "$obj_file" "examples/$seq_name/objs/mesh.obj"
+            echo "Copied $obj_file -> examples/$seq_name/objs/mesh.obj"
+        else
+            echo "Warning: $obj_file not found"
+        fi
+        # extract frames
+        video_file="examples/$seq_name/input.mp4"
+        if [ -f "$video_file" ]; then
+            echo "Found video file: $video_file"
+            cd "examples/$seq_name"
+            mkdir -p imgs
+            ffmpeg -i input.mp4 -vf fps=10 imgs/frame_%04d.png -y
+            echo "Extracted frames from $video_file to imgs/"
+            cd ../../
+        else
+            echo "No video file found: $video_file"
+        fi
+    fi
+done
+cd animation
+# save flow
+echo "Processing sequences with save_flow.py..."
+for seq_dir in ../examples/*/; do
+    if [ -d "$seq_dir" ]; then
+        seq_name=$(basename "$seq_dir")
+        echo "Processing sequence: $seq_name"
+        python utils/save_flow.py --input_path ../examples --seq_name "$seq_name"
+    fi
+done
+# animation
+echo "Running optimization for each sequence..."
+mkdir -p ../results/animation
+python optimization.py --save_path ../results/animation --iter 200 --input_path ../examples --img_size 960 \
+        --seq_name 'spiderman' --save_name 'spiderman_demo'
+python optimization.py --save_path ../results/animation --iter 200 --input_path ../examples --img_size 960 \
+        --seq_name 'deer' --save_name 'deer_demo' --smooth_weight 1 --main_renderer front_left --additional_renderer "right,front_right,back_right"
+echo "Animation completed."
+cd ..
+echo "Puppeteer pipeline completed successfully!"

third_party/Puppeteer/demo_rigging.sh ADDED Viewed

	@@ -0,0 +1,117 @@

+#!/usr/bin/env bash
+# Robust Puppeteer rigging pipeline
+# - stop on errors
+# - safe path checks
+# - always copy artifacts into /data/results
+set -euo pipefail
+echo "[INFO] Starting Puppeteer rigging pipeline..."
+# ---------- env / paths ----------
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# repo 레이아웃이 /app/Puppeteer/{skeleton,skinning,...} 라고 가정
+ROOT_DIR="$(dirname "$SCRIPT_DIR")"               # /app/Puppeteer
+TMP_DIR="/tmp/puppeteer_run"
+WORK_DIR="${TMP_DIR}"                              # 통합 작업 디렉토리
+IN_EXAMPLES="${ROOT_DIR}/examples"
+OUT_ROOT="${WORK_DIR}/results"
+OUT_SKEL="${OUT_ROOT}/skel_results"
+OUT_SKEL_FOR_SKIN="${OUT_ROOT}/skeletons"
+OUT_SKIN="${OUT_ROOT}/skin_results"
+OUT_FINAL="${OUT_ROOT}/final_rigging"
+RESULT_DIR="${RESULT_DIR:-/data/results}"          # app.py와 동일 환경변수 사용
+mkdir -p "$WORK_DIR" "$OUT_ROOT" "$OUT_SKEL_FOR_SKIN" "$OUT_FINAL" "$RESULT_DIR"
+# python 경로 보강 (third_party / third_partys 호환)
+export PYTHONPATH="/app:/app/Puppeteer:/app/Puppeteer/third_party:${PYTHONPATH:-}"
+[ -d /app/third_partys ] || ln -s /app/Puppeteer/third_party /app/third_partys 2>/dev/null || true
+[ -f /app/Puppeteer/third_party/__init__.py ] || touch /app/Puppeteer/third_party/__init__.py
+# ---------- skeleton ----------
+echo "[INFO] Running skeleton generation..."
+cd "${ROOT_DIR}/skeleton"
+python demo.py \
+  --input_dir "${IN_EXAMPLES}" \
+  --pretrained_weights skeleton_ckpts/puppeteer_skeleton_w_diverse_pose.pth \
+  --output_dir "${OUT_ROOT}" \
+  --save_name skel_results \
+  --input_pc_num 8192 \
+  --save_render \
+  --apply_marching_cubes \
+  --joint_token \
+  --seq_shuffle
+echo "[INFO] Skeleton generation completed."
+# skel 결과를 skinning 입력 폴더로 복사 (존재 검증)
+echo "[INFO] Preparing skeletons for skinning..."
+if [ -d "${OUT_SKEL}" ]; then
+  mkdir -p "${OUT_SKEL_FOR_SKIN}"
+  shopt -s nullglob
+  for f in "${OUT_SKEL}"/*_pred.txt; do
+    cp -f "$f" "${OUT_SKEL_FOR_SKIN}/$(basename "${f/_pred.txt/.txt}")"
+  done
+  shopt -u nullglob
+else
+  echo "[ERR] ${OUT_SKEL} not found (skeleton step failed?)"
+  exit 1
+fi
+echo "[INFO] Copied rig files to ${OUT_SKEL_FOR_SKIN}"
+# ---------- skinning ----------
+echo "[INFO] Running skinning..."
+cd "${ROOT_DIR}/skinning"
+# CUDA_VISIBLE_DEVICES는 Space에서 보통 0 하나만 할당됨
+CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}" \
+torchrun --nproc_per_node=1 --master_port=10009 \
+  main.py \
+  --num_workers 1 \
+  --batch_size 1 \
+  --generate \
+  --save_skin_npy \
+  --pretrained_weights skinning_ckpts/puppeteer_skin_w_diverse_pose_depth1.pth \
+  --input_skel_folder "${OUT_SKEL_FOR_SKIN}" \
+  --mesh_folder "${IN_EXAMPLES}" \
+  --post_filter \
+  --depth 1 \
+  --save_folder "${OUT_SKIN}"
+echo "[INFO] Skinning completed."
+# ---------- collect artifacts ----------
+echo "[INFO] Collecting final artifacts..."
+mkdir -p "${OUT_FINAL}"
+# 선호 산출물: output/rigged.glb (있으면 우선 복사)
+if [ -f "${WORK_DIR}/output/rigged.glb" ]; then
+  cp -f "${WORK_DIR}/output/rigged.glb" "${OUT_FINAL}/rigged.glb"
+fi
+# skin 결과(glb) 있으면 함께 수집
+if [ -d "${OUT_SKIN}/generate" ]; then
+  shopt -s nullglob
+  cp -f "${OUT_SKIN}/generate/"*.glb "${OUT_FINAL}/" 2>/dev/null || true
+  shopt -u nullglob
+fi
+# ---------- export to /data/results ----------
+echo "[INFO] Exporting to ${RESULT_DIR} ..."
+mkdir -p "${RESULT_DIR}"
+shopt -s nullglob
+cp -f "${OUT_FINAL}/"*.glb "${RESULT_DIR}/" 2>/dev/null || true
+cp -f "${OUT_FINAL}/"*.gltf "${RESULT_DIR}/" 2>/dev/null || true
+shopt -u nullglob
+# 결과 검증: 최소 하나라도 존재해야 성공 처리
+if compgen -G "${RESULT_DIR}/*.glb" > /dev/null || compgen -G "${RESULT_DIR}/*.gltf" > /dev/null ; then
+  echo "[OK] Artifacts saved to ${RESULT_DIR}"
+else
+  echo "[ERR] No .glb/.gltf produced. Check skeleton/skinning logs."
+  exit 2
+fi
+echo "[INFO] Pipeline finished successfully."

third_party/Puppeteer/requirements.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+trimesh==4.2.3
+accelerate==0.28.0
+mesh2sdf==1.1.0
+transformers==4.46.1
+numpy==1.26.4
+pyrender==0.1.45
+tqdm
+opencv-python==4.9.0.80
+omegaconf==2.3.0
+einops==0.7.0
+timm
+lightning==2.2
+boto3
+cython==0.29.36
+tetgen==0.5.2
+loguru
+pytz
+h5py
+plyfile
+pymeshlab
+yacs
+fvcore
+easydict
+libigl==2.5.1
+scikit-learn
+jsonargparse
+ptlflow
+imageio-ffmpeg==0.4.7
+xformers==0.0.23

third_party/Puppeteer/skeleton/README.md ADDED Viewed

	@@ -0,0 +1,93 @@

+# Auto-regressive Skeleton Generation
+This folder provides the skeleton generation implementation and scripts to evaluate the paper’s metrics on three test sets. You can also run inference on your own 3D objects.
+## Weights Download
+First download [checkpoints of Michelangelo](https://huggingface.co/Maikou/Michelangelo/tree/main/checkpoints/aligned_shape_latents) and our [released weights](https://huggingface.co/Seed3D/Puppeteer) for skeleton generation:
+```
+python download.py
+```
+## Evaluation
+To reproduce our evaluations, run the following command on `Articulation-XL2.0-test`, `ModelsResource-test` and `Diverse-pose-test`. `Articulation-XL2.0-test` and `Diverse-pose-test` are available [here](https://huggingface.co/datasets/chaoyue7/Articulation-XL2.0). For your convenience, we also save `ModelsResource-test` in our format (download it [here](https://drive.google.com/file/d/12U2ZuZWcKCQRI3IheBbG6I9-jfpG4KF5/view?usp=sharing)). The inference process requires 4.6 GB of VRAM and takes 1–2 seconds per inference.
+```
+bash eval.sh
+```
+You can change `save_name` for different evaluation and check the quantitative results afterwards in `evaluate_results.txt`. The pipeline saves mesh and skeleton as `.obj` files; pass `--save_render` to additionally generate rendered previews of the mesh and skeleton.
+These are the numbers (the metrics are in units of 10−2) that you should be able to reproduce using the released weights and the current version of the codebase.
+<table>
+  <thead>
+    <tr>
+      <th rowspan="2">Test set</th>
+      <th colspan="3">Articulation-XL2.0-test</th>
+      <th colspan="3">ModelsResource-test</th>
+      <th colspan="3">Diverse-pose-test</th>
+    </tr>
+    <tr>
+      <th>CD-J2J</th>
+      <th>CD-J2B</th>
+      <th>CD-B2B</th>
+      <th>CD-J2J</th>
+      <th>CD-J2B</th>
+      <th>CD-B2B</th>
+      <th>CD-J2J</th>
+      <th>CD-J2B</th>
+      <th>CD-B2B</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>train on Arti-XL2.0 w/o diverse-pose subset</td>
+      <td>3.062</td>
+      <td>2.342</td>
+      <td>1.963</td>
+      <td>3.843</td>
+      <td>2.876</td>
+      <td>2.465</td>
+      <td>3.276</td>
+      <td>2.597</td>
+      <td>2.074</td>
+    </tr>
+    <tr>
+      <td>train on Arti-XL2.0 w/ diverse-pose subset</td>
+      <td><b>3.047</b></td>
+      <td><b>2.337</b></td>
+      <td><b>1.952</b></td>
+      <td><b>3.785</b></td>
+      <td><b>2.847</b></td>
+      <td><b>2.430</b></td>
+      <td><b>2.483</b></td>
+      <td><b>1.922</b></td>
+      <td><b>1.600</b></td>
+    </tr>
+  </tbody>
+</table>
+Note: If your results differ from the reported numbers in the table above (e.g., 3.78-->～3.90 for CD-J2J on ModelsResource), check the version of `transformers` which may cause the following warnings:
+```
+Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in SkeletonOPTModel is torch.float32. You should run training or inference using Automatic Mixed-Precision via the with torch.autocast(device_type='torch_device'): decorator, or load the model with the torch_dtype argument. Example: model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)
+Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in SkeletonOPTDecoder is torch.float32. You should run training or inference using Automatic Mixed-Precision via the with torch.autocast(device_type='torch_device'): decorator, or load the model with the torch_dtype argument. Example: model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)
+```
+These results were obtained using `CUDA 11.8`. We observed that switching to CUDA 12.1 or other versions, while keeping all package versions identical, resulted in slight numerical variations.
+## Demo
+We provide some examples (download [here](https://drive.google.com/file/d/1bjtA3JSqW-t0YoSd2vOZy3iKvuOMLIrm/view?usp=sharing)) to test our models by running the following command. You can also test our models on your 3D objects, remeber to change the `input_dir`.
+```
+bash demo.sh
+```
+Input mesh quality directly affects model performance, since the pre-trained shape encoder was trained on high-quality meshes. You can test reconstruction using the shape latents extracted from the shape encoder to check your data. The example below shows results from an input mesh with coarse surface.
+<p align="center">
+  <img width="80%" src="../assets/reconstruction.png"/>
+</p>
+## Visualization
+We use MeshLab for skeleton visualization in paper. The skeleton can be saved using `save_skeleton_obj` in `utils/save_utils.py`. Bones are represented as blue cones oriented from the parent joint to the child joint, joints as red spheres, and the root joint as a green sphere. Example results are shown below.
+<p align="center">
+  <img width="80%" src="../assets/skeleton_results.png"/>
+</p>

third_party/Puppeteer/skeleton/data_utils/README.md ADDED Viewed

	@@ -0,0 +1,43 @@

+## Preprocessed data
+We provide the preprocessed data that saved in NPZ files, which contain the following information:
+```
+'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'pc_w_norm', 'joint_names', 'skinning_weights_value', 'skinning_weights_rows', 'skinning_weights_cols', 'skinning_weights_shape'
+```
+You can check `read_npz.py` for how to read the NPZ files and `save_npz.py` for how we save them.
+Before saving them into NPZ files, we extract mesh(.obj) and rig(.txt) from downloaded 3D models from Objaverse-XL using Blender. The rig file follows the format in [RigNet](https://github.com/zhan-xu/RigNet), which includes the following entries:
+```
+joints [joint_name] [x] [y] [z]
+root [root_joint_name]
+skin [vertex_index] [joints_name1] [skinning_weight1] [joints_name2] [skinning_weight2] ...
+hier [parent_joint_name] [child_joint_name]
+```
+For an example, please see `examples/0a59c5ffa4a1476bac6d540b79947f31.txt`.
+If you want to convert NPZ file back to OBJ and TXT files, we give an example by running:
+```
+python convert_npz_to_mesh_rig.py
+```
+## Visualization
+We provide a method for visualizing 3D models with skeleton using [Pyrender](https://github.com/mmatl/pyrender), modified from [Lab4D](https://github.com/lab4d-org/lab4d/tree/ppr/). This visualization also serves as input to the VLM for skeleton quality rating. Make sure you have installed the following packages before running visualization:
+```
+pip install trimesh opencv-python pyrender
+```
+We provide an example to demonstrate the process. For this example, we prepare an OBJ file along with a TXT file containing rigging information. Then, run:
+```
+python render_data.py
+```
+You will obtain the following outputs:
+<p align="center">
+  <img width="80%" src="examples/0a59c5ffa4a1476bac6d540b79947f31_render_results.png"/>
+</p>
+### Reading rig and mesh from GLBs
+We provide the script we use for reading rig (.txt) and mesh (.obj) from glb files. You can run:
+```
+python read_rig_mesh_from_glb.py
+```
+Remember to download Blender (we use 4.2.0) and also bpy in your conda environment.

third_party/Puppeteer/skeleton/data_utils/convert_npz_to_mesh_rig.py ADDED Viewed

	@@ -0,0 +1,107 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+You can convert npz file back to obj(mesh) and txt(rig) files using this python script.
+"""
+import os
+import numpy as np
+import scipy.sparse as sp
+def export_obj(vertices, faces, normals, output_path):
+    with open(output_path, 'w') as f:
+        for v in vertices:
+            f.write(f"v {v[0]} {v[1]} {v[2]}\n")
+        for n in normals:
+            f.write(f"vn {n[0]} {n[1]} {n[2]}\n")
+        for i, face in enumerate(faces):
+            # OBJ format is 1-based, so we add 1 to all indices
+            f.write(f"f {face[0]+1}//{face[0]+1} {face[1]+1}//{face[1]+1} {face[2]+1}//{face[2]+1}\n")
+def export_rig_txt(joints, bones, root_index, joint_names, skinning_weights, output_path):
+    """
+    joints [joint_name] [x] [y] [z]
+    root [root_joint_name]
+    skin [vertex_index] [joint_name1] [weight1] [joint_name2] [weight2] ...
+    hier [parent_joint_name] [child_joint_name]
+    """
+    n_joints = len(joints)
+    n_verts = skinning_weights.shape[0]  # (n_vertex, n_joints)
+    with open(output_path, 'w') as f:
+        # 1) joints
+        for i in range(n_joints):
+            x, y, z = joints[i]
+            jn = joint_names[i]
+            f.write(f"joints {jn} {x} {y} {z}\n")
+        # 2) root
+        root_name = joint_names[root_index]
+        f.write(f"root {root_name}\n")
+        # 3) skin
+        for vidx in range(n_verts):
+            row_weights = skinning_weights[vidx]
+            non_zero_indices = np.where(row_weights != 0)[0]
+            if len(non_zero_indices) == 0:
+                continue
+            line_parts = [f"skin {vidx}"]  # vertex_idx
+            for jidx in non_zero_indices:
+                w = row_weights[jidx]
+                jn = joint_names[jidx]
+                line_parts.append(jn)
+                line_parts.append(str(w))
+            f.write(" ".join(line_parts) + "\n")
+        # 4) hier
+        for p_idx, c_idx in bones:
+            p_name = joint_names[p_idx]
+            c_name = joint_names[c_idx]
+            f.write(f"hier {p_name} {c_name}\n")
+if __name__ == "__main__":
+    data = np.load('articulation_xlv2_test.npz', allow_pickle=True)
+    data_list = data['arr_0']
+    print(f"Loaded {len(data_list)} data entries")
+    model_data = data_list[0]
+    print("Data keys:", model_data.keys())
+    # 'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'joint_names',
+    # 'skinning_weights_value', 'skinning_weights_row', 'skinning_weights_col', 'skinning_weights_shape'
+    vertices = model_data['vertices']          # (n_vertex, 3)
+    faces = model_data['faces']                # (n_faces, 3)
+    normals = model_data['normals']            # (n_vertex, 3)
+    joints = model_data['joints']              # (n_joints, 3)
+    bones = model_data['bones']                # (n_bones, 2)
+    root_index = model_data['root_index']      # int
+    joint_names = model_data['joint_names']    # list of str
+    uuid_str = model_data['uuid']
+    skin_val = model_data['skinning_weights_value']
+    skin_row = model_data['skinning_weights_row']
+    skin_col = model_data['skinning_weights_col']
+    skin_shape = model_data['skinning_weights_shape']
+    skin_sparse = sp.coo_matrix((skin_val, (skin_row, skin_col)), shape=skin_shape)
+    skinning_weights = skin_sparse.toarray()  # (n_vertex, n_joints)
+    obj_path = f"{uuid_str}.obj"
+    export_obj(vertices, faces, normals, obj_path)
+    rig_txt_path = f"{uuid_str}.txt"
+    export_rig_txt(joints, bones, root_index, joint_names, skinning_weights, rig_txt_path)
+    print("Done!")

third_party/Puppeteer/skeleton/data_utils/data_loader.py ADDED Viewed

	@@ -0,0 +1,122 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import json
+import glob
+import numpy as np
+import trimesh
+class DataLoader:
+    def __init__(self):
+        self.joint_name_to_idx = {}
+    def load_rig_data(self, rig_path):
+        joints = []
+        joints_names = []
+        bones = []
+        with open(rig_path, 'r') as f:
+            for line in f:
+                parts = line.strip().split()
+                if parts[0] == 'joints':
+                    joint_name = parts[1]
+                    joint_pos = [float(parts[2]), float(parts[3]), float(parts[4])]
+                    self.joint_name_to_idx[joint_name] = len(joints)
+                    joints.append(joint_pos)
+                    joints_names.append(joint_name)
+                elif parts[0] == 'root':
+                    self.root_name = parts[1]
+                elif parts[0] == 'hier':
+                    parent_joint = self.joint_name_to_idx[parts[1]]
+                    child_joint = self.joint_name_to_idx[parts[2]]
+                    bones.append([parent_joint, child_joint])
+        self.joints = np.array(joints)
+        self.bones = np.array(bones)
+        self.joints_names = joints_names
+        self.root_idx = None
+        if self.root_name is not None:
+            self.root_idx = self.joint_name_to_idx[self.root_name]
+    def load_mesh(self, mesh_path):
+        mesh = trimesh.load(mesh_path, process=False)
+        mesh.visual.vertex_colors[:, 3] = 100  # set transparency
+        self.mesh = mesh
+        # Compute the centroid normal of the mesh
+        v = self.mesh.vertices
+        xmin, ymin, zmin = v.min(axis=0)
+        xmax, ymax, zmax = v.max(axis=0)
+        self.bbox_center = np.array([(xmax + xmin)/2, (ymax + ymin)/2, (zmax + zmin)/2])
+        self.bbox_size = np.array([xmax - xmin, ymax - ymin, zmax - zmin])
+        self.bbox_scale = max(xmax - xmin, ymax - ymin, zmax - zmin)
+        normal = mesh.center_mass - self.bbox_center
+        normal = normal / (np.linalg.norm(normal)+1e-5)
+        # Choose axis order based on normal direction
+        if abs(normal[1]) > abs(normal[2]):  # if Y component is dominant
+            self.axis_order = [0, 1, 2]  # swapping Y and Z
+        else:
+            self.axis_order =[0, 2, 1]  # keep default order
+        self.mesh.vertices = self.mesh.vertices[:, self.axis_order]
+        self.joints = self.joints[:, self.axis_order]
+        self.normalize_coordinates()
+    def normalize_coordinates(self):
+        # Compute scale and offset
+        scale = 1.0 / (self.bbox_scale+1e-5)
+        offset = -self.bbox_center
+        self.mesh.vertices = (self.mesh.vertices + offset) * scale
+        self.joints = (self.joints + offset) * scale
+        # Calculate appropriate radii based on the mean size
+        self.joint_radius = 0.01
+        self.bone_radius = 0.005
+    def query_mesh_rig(self):
+        input_dict = {"shape": self.mesh}
+        # Create joints as spheres
+        joint_meshes = []
+        for i, joint in enumerate(self.joints):
+            sphere = trimesh.creation.icosphere(
+                radius=self.joint_radius, subdivisions=2
+            )
+            sphere.apply_translation(joint)
+            if i == self.root_idx:
+                # root green
+                sphere.visual.vertex_colors = [0, 255, 0, 255]
+            else:
+                sphere.visual.vertex_colors = [0, 0, 255, 255]
+            joint_meshes.append(sphere)
+        input_dict["joint_meshes"] = trimesh.util.concatenate(joint_meshes)
+        # Create bones as cylinders
+        bone_meshes = []
+        for bone in self.bones:
+            start, end = self.joints[bone[0]], self.joints[bone[1]]
+            cylinder = trimesh.creation.cylinder(radius=self.bone_radius, segment=np.array([[0, 0, 0], end - start]))
+            cylinder.apply_translation(start)
+            cylinder.visual.vertex_colors = [255, 0, 0, 255]  #[0, 0, 255, 255]  # blue
+            bone_meshes.append(cylinder)
+        input_dict["bone_meshes"] = trimesh.util.concatenate(bone_meshes)
+        return input_dict

third_party/Puppeteer/skeleton/data_utils/pyrender_wrapper.py ADDED Viewed

	@@ -0,0 +1,144 @@

+# Copyright (c) 2023 Gengshan Yang
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: MIT
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under MIT, with the full license text
+# available at https://github.com/lab4d-org/lab4d/blob/main/LICENSE.
+#
+# This modified file is released under the same license.
+import os
+import numpy as np
+import cv2
+import pyrender
+import trimesh
+from pyrender import (
+    IntrinsicsCamera,
+    Mesh,
+    Node,
+    Scene,
+    OffscreenRenderer,
+    MetallicRoughnessMaterial,
+    RenderFlags
+)
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+def look_at(eye, center, up):
+    """Create a look-at (view) matrix."""
+    f = np.array(center, dtype=np.float32) - np.array(eye, dtype=np.float32)
+    f /= np.linalg.norm(f)
+    u = np.array(up, dtype=np.float32)
+    u /= np.linalg.norm(u)
+    s = np.cross(f, u)
+    u = np.cross(s, f)
+    m = np.identity(4, dtype=np.float32)
+    m[0, :3] = s
+    m[1, :3] = u
+    m[2, :3] = -f
+    m[:3, 3] = -np.matmul(m[:3, :3], np.array(eye, dtype=np.float32))
+    return m
+class PyRenderWrapper:
+    def __init__(self, image_size=(1024, 1024)) -> None:
+        # renderer
+        self.image_size = image_size
+        render_size = max(image_size)
+        self.r = OffscreenRenderer(render_size, render_size)
+        self.intrinsics = IntrinsicsCamera(
+            render_size, render_size, render_size / 2, render_size / 2
+        )
+        # light
+        self.light_pose = np.eye(4)
+        self.set_light_topdown()
+        self.direc_l = pyrender.DirectionalLight(color=np.ones(3), intensity=5.0)
+        self.material = MetallicRoughnessMaterial(
+            roughnessFactor=0.75, metallicFactor=0.75, alphaMode="BLEND"
+        )
+        self.init_camera()
+    def init_camera(self):
+        self.flip_pose = np.eye(4)
+        self.set_camera(np.eye(4))
+    def set_camera(self, scene_to_cam):
+        # object to camera transforms
+        self.scene_to_cam = self.flip_pose @ scene_to_cam
+    def set_light_topdown(self, gl=False):
+        # top down light, slightly closer to the camera
+        if gl:
+            rot = cv2.Rodrigues(np.asarray([-np.pi / 2, 0, 0]))[0]
+        else:
+            rot = cv2.Rodrigues(np.asarray([np.pi / 2, 0, 0]))[0]
+        self.light_pose[:3, :3] = rot
+    def align_light_to_camera(self):
+        self.light_pose = np.linalg.inv(self.scene_to_cam)
+    def set_intrinsics(self, intrinsics):
+        """
+        Args:
+            intrinsics: (4,) fx,fy,px,py
+        """
+        self.intrinsics = IntrinsicsCamera(
+            intrinsics[0], intrinsics[1], intrinsics[2], intrinsics[3]
+        )
+    def get_cam_to_scene(self):
+        cam_to_scene = np.eye(4)
+        cam_to_scene[:3, :3] = self.scene_to_cam[:3, :3].T
+        cam_to_scene[:3, 3] = -self.scene_to_cam[:3, :3].T @ self.scene_to_cam[:3, 3]
+        return cam_to_scene
+    def set_camera_view(self, angle, bbox_center, distance=2.0):
+        # Calculate camera position based on angle and distance from bounding box center
+        camera_position = bbox_center + distance * np.array([np.sin(angle), 0, np.cos(angle)], dtype=np.float32)
+        look_at_matrix = look_at(camera_position, bbox_center, [0, 1, 0])
+        self.scene_to_cam = look_at_matrix @ self.flip_pose
+    def render(self, input_dict):
+        # Create separate scenes for transparent objects (mesh) and solid objects (joints and bones)
+        scene_transparent = Scene(ambient_light=np.array([1.0, 1.0, 1.0, 1.0]) * 0.1)
+        scene_solid = Scene(ambient_light=np.array([1.0, 1.0, 1.0, 1.0]) * 0.1)
+        mesh_pyrender = Mesh.from_trimesh(input_dict["shape"], smooth=False)
+        mesh_pyrender.primitives[0].material = self.material
+        scene_transparent.add(mesh_pyrender, pose=np.eye(4), name="shape")
+        if "joint_meshes" in input_dict:
+            joints_pyrender = Mesh.from_trimesh(input_dict["joint_meshes"], smooth=False)
+            joints_pyrender.primitives[0].material = self.material
+            scene_solid.add(joints_pyrender, pose=np.eye(4), name="joints")
+        if "bone_meshes" in input_dict:
+            bones_pyrender = Mesh.from_trimesh(input_dict["bone_meshes"], smooth=False)
+            bones_pyrender.primitives[0].material = self.material
+            scene_solid.add(bones_pyrender, pose=np.eye(4), name="bones")
+        # Camera for both scenes
+        scene_transparent.add(self.intrinsics, pose=self.get_cam_to_scene())
+        scene_solid.add(self.intrinsics, pose=self.get_cam_to_scene())
+        # Light for both scenes
+        scene_transparent.add(self.direc_l, pose=self.light_pose)
+        scene_solid.add(self.direc_l, pose=self.light_pose)
+        # Render transparent scene first
+        color_transparent, depth_transparent = self.r.render(scene_transparent)
+        # Render solid scene on top
+        color_solid, depth_solid = self.r.render(scene_solid)
+        # Combine the two scenes
+        color_combined = np.where(depth_solid[..., np.newaxis] == 0, color_transparent, color_solid)
+        return color_combined, depth_solid
+    def delete(self):
+        self.r.delete()

third_party/Puppeteer/skeleton/data_utils/read_npz.py ADDED Viewed

	@@ -0,0 +1,43 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import numpy as np
+import scipy.sparse as sp
+# Load the NPZ file
+data = np.load('articulation_xlv2_test.npz', allow_pickle=True)
+data_list = data['arr_0']
+print(f"Loaded {len(data_list)} data entries")
+print(f"Data keys: {data_list[0].keys()}")
+# 'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'pc_w_norm', 'joint_names', 'skinning_weights_value',
+# 'skinning_weights_row', 'skinning_weights_col', 'skinning_weights_shape'
+data = data_list[0] # check the first data
+vertices = data['vertices'] # (n_vertex, 3)
+faces = data['faces'] # (n_faces, 3)
+normals = data['normals'] # (n_vertex, 3)
+joints = data['joints'] # (n_joints, 3)
+bones = data['bones'] # (n_bones, 2)
+pc_w_norm = data['pc_w_norm'] # (8192, 6)
+# Extract the sparse skinning weights components
+skinning_data = data['skinning_weights_value']
+skinning_rows = data['skinning_weights_row']
+skinning_cols = data['skinning_weights_col']
+skinning_shape = data['skinning_weights_shape']
+skinning_sparse = sp.coo_matrix((skinning_data, (skinning_rows, skinning_cols)), shape=skinning_shape)
+skinning_weights = skinning_sparse.toarray()  # (n_vertex, n_joints)

third_party/Puppeteer/skeleton/data_utils/read_rig_mesh_from_glb.py ADDED Viewed

	@@ -0,0 +1,198 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Blender script for extracting rig (.txt) and mesh (.obj) from glbs.
+This code currently supports GLB files only, but it can be easily modified to load other formats (e.g., FBX, DAE) with minimal changes.
+"""
+import bpy
+import os
+import re
+import json
+import pickle
+def get_hierarchy_root_joint(joint):
+    """
+    Function to find the top parent joint node from the given
+    'joint' Blender node (armature bone).
+    """
+    root_joint = joint
+    while root_joint.parent is not None:
+        root_joint = root_joint.parent
+    return root_joint
+def get_meshes_and_armatures():
+    """
+    Function to get all meshes and armatures in the scene
+    """
+    default_objects = ['Cube', 'Light', 'Camera', 'Icosphere']
+    for obj_name in default_objects:
+        if obj_name in bpy.data.objects:
+            bpy.data.objects.remove(bpy.data.objects[obj_name], do_unlink=True)
+    meshes = [obj for obj in bpy.context.scene.objects if obj.type == 'MESH']
+    armatures = [obj for obj in bpy.context.scene.objects if obj.type == 'ARMATURE']
+    return meshes, armatures
+def get_joint_dict(root):
+    """
+    Function to create a dictionary of joints from the root joint
+    """
+    joint_pos = {}
+    def traverse_bone(bone):
+        joint_pos[bone.name] = {
+            'pos': bone.head_local,
+            'pa': bone.parent.name if bone.parent else 'None',
+            'ch': [child.name for child in bone.children]
+        }
+        for child in bone.children:
+            traverse_bone(child)
+    traverse_bone(root)
+    return joint_pos
+def record_info(root, joint_dict, meshes, mesh_vert_offsets, file_info):
+    """
+    - root: root joint
+    - joint_dict
+    - meshes
+    - mesh_vert_offsets: for multi-geometry
+    - file_info
+    """
+    skin_records = {}
+    def replace_special_characters(name):
+        return re.sub(r'\W+', '_', name)
+    for key, val in joint_dict.items():
+        modified_key = replace_special_characters(key)
+        file_info.write(f'joints {modified_key} {val["pos"][0]:.8f} {val["pos"][1]:.8f} {val["pos"][2]:.8f}\n')
+    file_info.write(f'root {replace_special_characters(root.name)}\n')
+    for mesh_index, mesh in enumerate(meshes):
+        vert_offset = mesh_vert_offsets[mesh_index]
+        if mesh.type == 'MESH':
+            for vtx in mesh.data.vertices:
+                weights = {}
+                for group in vtx.groups:
+                    bone_name = replace_special_characters(mesh.vertex_groups[group.group].name)
+                    weights[bone_name] = group.weight
+                global_vertex_index = vert_offset + vtx.index
+                skin_record = f"skin {global_vertex_index} " + " ".join(f"{bone} {weight:.4f}" for bone, weight in weights.items())
+                if global_vertex_index not in skin_records:
+                    skin_records[global_vertex_index] = skin_record
+                    file_info.write(skin_record + "\n")
+    for key, val in joint_dict.items():
+        if val['pa'] != 'None':
+            parent_name = replace_special_characters(val['pa'])
+            child_name = replace_special_characters(key)
+            file_info.write(f'hier {parent_name} {child_name}\n')
+def record_obj(meshes, file_obj):
+    vert_offset = 0
+    norm_offset = 0
+    mesh_vert_offsets = []
+    for mesh in meshes:
+        mesh_vert_offsets.append(vert_offset)
+        bpy.context.view_layer.objects.active = mesh
+        bpy.ops.object.mode_set(mode='OBJECT')
+        # vertex
+        for v in mesh.data.vertices:
+            file_obj.write(f"v {v.co[0]} {v.co[1]} {v.co[2]}\n")
+        file_obj.write("\n")
+        # normal
+        for vn in mesh.data.vertices:
+            normal = vn.normal
+            file_obj.write(f"vn {normal[0]} {normal[1]} {normal[2]}\n")
+        file_obj.write("\n")
+        # face
+        for poly in mesh.data.polygons:
+            verts = [v + 1 + vert_offset for v in poly.vertices]
+            file_obj.write(f"f {verts[0]}//{verts[0]} {verts[1]}//{verts[1]} {verts[2]}//{verts[2]}\n")
+        vert_count = len(mesh.data.vertices)
+        vert_offset += vert_count
+        norm_offset += vert_count
+    return mesh_vert_offsets
+def process_glb(glb_path, rigs_dir, meshes_dir):
+    base_name = os.path.splitext(os.path.basename(glb_path))[0]
+    obj_name = os.path.join(meshes_dir, f'{base_name}.obj')
+    info_name = os.path.join(rigs_dir, f'{base_name}.txt')
+    # Skip processing if rig info file already exists
+    if os.path.exists(info_name):
+        print(f"{info_name} already exists. Skipping...")
+        return
+    if os.path.exists(obj_name):
+        print(f"{obj_name} already exists. Skipping...")
+        return
+    bpy.ops.wm.read_factory_settings(use_empty=True)
+    bpy.ops.import_scene.gltf(filepath=glb_path)
+    meshes, armatures = get_meshes_and_armatures()
+    if not armatures:
+        print(f"No armatures found in {glb_path}. Skipping...")
+        return
+    root = armatures[0].data.bones[0]
+    root_name = get_hierarchy_root_joint(root)
+    joint_dict = get_joint_dict(root_name)
+    #  save meshes
+    with open(obj_name, 'w') as file_obj:
+        mesh_vert_offsets = record_obj(meshes, file_obj)
+    # save rigs
+    with open(info_name, 'w') as file_info:
+        record_info(root_name, joint_dict, meshes, mesh_vert_offsets, file_info)
+    print(f"Processed {glb_path}")
+if __name__ == '__main__':
+    src_dir = 'glbs'
+    rigs_dir = 'rigs'
+    meshes_dir = 'meshes'
+    # Ensure rigs directory exists
+    if not os.path.exists(rigs_dir):
+        os.makedirs(rigs_dir)
+    if not os.path.exists(meshes_dir):
+        os.makedirs(meshes_dir)
+    glb_paths = [os.path.join(src_dir, file) for file in os.listdir(src_dir) if file.endswith('.glb')]
+    print(len(glb_paths))
+    for glb_path in glb_paths:
+        try:
+            process_glb(glb_path, rigs_dir, meshes_dir)
+        except Exception as e:
+            with open('error.txt', 'a') as error_file:
+                error_file.write(f"{glb_path}: {str(e)}\n")

third_party/Puppeteer/skeleton/data_utils/render_data.py ADDED Viewed

	@@ -0,0 +1,61 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import numpy as np
+import cv2
+from pyrender_wrapper import PyRenderWrapper
+from data_loader import DataLoader
+def main():
+    loader = DataLoader()
+    raw_size = (960, 960)
+    renderer = PyRenderWrapper(raw_size)
+    output_dir = 'render_results'
+    os.makedirs(output_dir, exist_ok=True)
+    rig_path = 'examples/0a59c5ffa4a1476bac6d540b79947f31.txt'
+    mesh_path = rig_path.replace('.txt', '.obj')
+    filename = os.path.splitext(os.path.basename(rig_path))[0]
+    loader.load_rig_data(rig_path)
+    loader.load_mesh(mesh_path)
+    input_dict = loader.query_mesh_rig()
+    angles = [0, np.pi/2, np.pi, 3*np.pi/2]
+    bbox_center = loader.mesh.bounding_box.centroid
+    bbox_size = loader.mesh.bounding_box.extents
+    distance = np.max(bbox_size) * 2
+    subfolder_path = os.path.join(output_dir, filename)
+    os.makedirs(subfolder_path, exist_ok=True)
+    for i, angle in enumerate(angles):
+        print(f"Rendering view at {np.degrees(angle)} degrees")
+        renderer.set_camera_view(angle, bbox_center, distance)
+        renderer.align_light_to_camera()
+        color = renderer.render(input_dict)[0]
+        output_filename = f"{filename}_view{i+1}.png"
+        output_filepath = os.path.join(subfolder_path, output_filename)
+        cv2.imwrite(output_filepath, color)
+if __name__ == "__main__":
+    main()

third_party/Puppeteer/skeleton/data_utils/save_npz.py ADDED Viewed

	@@ -0,0 +1,256 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+This python script shows how we process the meshes and rigs from the input folders and save them in a compressed npz file.
+"""
+import os
+import numpy as np
+import glob
+import pickle
+from concurrent.futures import ProcessPoolExecutor
+import skimage.measure
+import trimesh
+import mesh2sdf.core
+import scipy.sparse as sp
+def read_obj_file(file_path):
+    vertices = []
+    faces = []
+    normals = []  # Added normals list
+    with open(file_path, 'r') as file:
+        for line in file:
+            if line.startswith('v '):
+                parts = line.split()[1:]
+                vertices.append([float(parts[0]), float(parts[1]), float(parts[2])])
+            elif line.startswith('vn '):  # Added reading normals
+                parts = line.split()[1:]
+                normals.append([float(parts[0]), float(parts[1]), float(parts[2])])
+            elif line.startswith('f '):
+                parts = line.split()[1:]
+                # OBJ format is 1-based, we need 0-based for npz
+                face = [int(part.split('//')[0]) - 1 for part in parts]
+                faces.append(face)
+    return np.array(vertices), np.array(faces), np.array(normals)
+def read_rig_file(file_path):
+    """
+    Read rig from txt file, our format is the same as RigNet:
+    joints joint_name x y z
+    root root_joint_name
+    skin vertex_idx joint_name weight joint_name weight ...
+    hier parent_joint_name child_joint_name
+    """
+    joints = []
+    bones = []
+    joint_names = []
+    joint_mapping = {}
+    joint_index = 0
+    skinning_data = {}  # Dictionary to store vertex index -> [(joint_idx, weight), ...]
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+    for line in lines:
+        parts = line.split()
+        if line.startswith('joints'):
+            name = parts[1]
+            position = [float(parts[2]), float(parts[3]), float(parts[4])]
+            joints.append(position)
+            joint_names.append(name)
+            joint_mapping[name] = joint_index
+            joint_index += 1
+        elif line.startswith('hier'):
+            parent_joint = joint_mapping[parts[1]]
+            child_joint = joint_mapping[parts[2]]
+            bones.append([parent_joint, child_joint])
+        elif line.startswith('root'):
+            root = joint_mapping[parts[1]]
+        elif line.startswith('skin'):
+            vertex_idx = int(parts[1])
+            if vertex_idx not in skinning_data:
+                skinning_data[vertex_idx] = []
+            for i in range(2, len(parts), 2):
+                if i+1 < len(parts):
+                    joint_name = parts[i]
+                    weight = float(parts[i+1])
+                    if joint_name in joint_mapping:
+                        joint_idx = joint_mapping[joint_name]
+                        skinning_data[vertex_idx].append((joint_idx, weight))
+    return np.array(joints), np.array(bones), root, joint_names, skinning_data
+def convert_to_sparse_skinning(skinning_data, num_vertices, num_joints):
+    """Convert skinning weights to sparse matrix format."""
+    rows = []
+    cols = []
+    data = []
+    for vertex_idx, weights in skinning_data.items():
+        for joint_idx, weight in weights:
+            rows.append(vertex_idx)
+            cols.append(joint_idx)
+            data.append(weight)
+    sparse_skinning = sp.coo_matrix((data, (rows, cols)), shape=(num_vertices, num_joints))
+    # Return as tuple of arrays which can be serialized
+    return (sparse_skinning.data, sparse_skinning.row, sparse_skinning.col, sparse_skinning.shape)
+def normalize_to_unit_cube(vertices, normals=None, scale_factor=1.0):
+    min_coords = vertices.min(axis=0)
+    max_coords = vertices.max(axis=0)
+    center = (max_coords + min_coords) / 2.0
+    vertices -= center
+    scale = 1.0 / np.abs(vertices).max() * scale_factor
+    vertices *= scale
+    if normals is not None:
+        # Normalize each normal vector to unit length
+        norms = np.linalg.norm(normals, axis=1, keepdims=True)
+        normals = normals / (norms+1e-8)
+        return vertices, normals, center, scale
+    else:
+        return vertices, center, scale
+def normalize_vertices(vertices, scale=0.9):
+    bbmin, bbmax = vertices.min(0), vertices.max(0)
+    center = (bbmin + bbmax) * 0.5
+    scale = 2.0 * scale / (bbmax - bbmin).max()
+    vertices = (vertices - center) * scale
+    return vertices, center, scale
+def export_to_watertight(normalized_mesh, octree_depth: int = 7):
+    """
+        Convert the non-watertight mesh to watertight.
+        Args:
+            input_path (str): normalized path
+            octree_depth (int):
+        Returns:
+            mesh(trimesh.Trimesh): watertight mesh
+        """
+    size = 2 ** octree_depth
+    level = 2 / size
+    scaled_vertices, to_orig_center, to_orig_scale = normalize_vertices(normalized_mesh.vertices)
+    sdf = mesh2sdf.core.compute(scaled_vertices, normalized_mesh.faces, size=size)
+    vertices, faces, normals, _ = skimage.measure.marching_cubes(np.abs(sdf), level)
+    # watertight mesh
+    vertices = vertices / size * 2 - 1 # -1 to 1
+    vertices = vertices / to_orig_scale + to_orig_center
+    mesh = trimesh.Trimesh(vertices, faces, normals=normals)
+    return mesh
+def process_mesh_to_pc(mesh, marching_cubes = True, sample_num = 8192):
+    if marching_cubes:
+        mesh = export_to_watertight(mesh)
+    return_mesh = mesh
+    points, face_idx = mesh.sample(sample_num, return_index=True)
+    points, _, _ = normalize_to_unit_cube(points, scale_factor=0.9995)
+    normals = mesh.face_normals[face_idx]
+    pc_normal = np.concatenate([points, normals], axis=-1, dtype=np.float16)
+    return pc_normal, return_mesh
+def process_single_file(args):
+    mesh_file, rig_file = args
+    mesh_name = os.path.basename(mesh_file).split('.')[0]
+    rig_name = os.path.basename(rig_file).split('.')[0]
+    if mesh_name != rig_name:
+        print(f"Skipping files {mesh_file} and {rig_file} because their names do not match.")
+        return None
+    vertices, faces, normals = read_obj_file(mesh_file)
+    joints, bones, root, joint_names, skinning_data = read_rig_file(rig_file)
+    # Normalize the mesh to the unit cube centered at the origin
+    vertices, normals, center, scale = normalize_to_unit_cube(vertices, normals, scale_factor=0.5)
+    # Apply the same transformation to joints
+    joints -= center
+    joints *= scale
+    # Create trimesh object for processing
+    mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+    # Process into point cloud with normals
+    pc_normal, _ = process_mesh_to_pc(mesh)
+    # Convert skinning data to sparse format
+    sparse_skinning = convert_to_sparse_skinning(skinning_data, len(vertices), len(joints))
+    return {
+        'vertices': vertices,
+        'faces': faces,
+        'normals': normals,
+        'joints': joints,
+        'bones': bones,
+        'root_index': root,
+        'uuid': mesh_name,
+        'pc_w_norm': pc_normal,
+        'joint_names': joint_names,
+        'skinning_weights_value': sparse_skinning[0],  # values
+        'skinning_weights_rows': sparse_skinning[1],  # row indices
+        'skinning_weights_cols': sparse_skinning[2],  # column indices
+        'skinning_weights_shape': sparse_skinning[3]  # shape of matrix
+    }
+def process_files(mesh_folder, rig_folder, output_file, num_workers=8):
+    file_pairs = []
+    for root, _, files in os.walk(rig_folder):
+        for file in files:
+            if file.endswith('.txt'):
+                rig_file = os.path.join(root, file)
+                obj_base_name = os.path.splitext(file)[0]
+                mesh_file = os.path.join(mesh_folder, obj_base_name + '.obj')
+                if os.path.exists(mesh_file):
+                    file_pairs.append((mesh_file, rig_file))
+                else:
+                    print(f"Mesh file not found: {mesh_file}")
+    with ProcessPoolExecutor(max_workers=num_workers) as executor:
+        data_list = list(executor.map(process_single_file, file_pairs))
+    data_list = [data for data in data_list if data is not None]
+    np.savez_compressed(output_file, data_list, allow_pickle=True)
+def main():
+    # Example usage
+    mesh_folder = 'meshes/'
+    rig_folder = 'rigs/'
+    output_file = 'results.npz'
+    process_files(mesh_folder, rig_folder, output_file)
+if __name__ == "__main__":
+    main()

third_party/Puppeteer/skeleton/demo.py ADDED Viewed

	@@ -0,0 +1,219 @@

+#  Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import torch
+import trimesh
+import argparse
+import numpy as np
+from tqdm import tqdm
+from trimesh import Scene
+from accelerate import Accelerator
+from accelerate.utils import set_seed
+from accelerate.utils import DistributedDataParallelKwargs
+from skeleton_models.skeletongen import SkeletonGPT
+from data_utils.save_npz import normalize_to_unit_cube
+from utils.mesh_to_pc import MeshProcessor
+from utils.save_utils import save_mesh, pred_joints_and_bones, save_skeleton_to_txt, save_skeleton_to_txt_joint, save_args, \
+                        merge_duplicate_joints_and_fix_bones, save_skeleton_obj, render_mesh_with_skeleton
+class Dataset:
+    def __init__(self, input_list, input_pc_num = 8192, apply_marching_cubes = True, octree_depth = 7, output_dir = None):
+        super().__init__()
+        self.data = []
+        self.output_dir = output_dir
+        mesh_list = []
+        for input_path in input_list:
+            ext = os.path.splitext(input_path)[1].lower()
+            if ext in ['.ply', '.stl', '.obj']:
+                cur_data = trimesh.load(input_path, force='mesh')
+                mesh_list.append(cur_data)
+            else:
+                print(f"Unsupported file type: {ext}")
+        if apply_marching_cubes:
+            print("First apply Marching Cubes and then sample point cloud, need time...")
+        pc_list = MeshProcessor.convert_meshes_to_point_clouds(mesh_list, input_pc_num, apply_marching_cubes = apply_marching_cubes, octree_depth = octree_depth)
+        for input_path, cur_data, mesh in zip(input_list, pc_list, mesh_list):
+            self.data.append({'pc_normal': cur_data, 'faces': mesh.faces, 'vertices': mesh.vertices, 'file_name': os.path.splitext(os.path.basename(input_path))[0]})
+        print(f"dataset total data samples: {len(self.data)}")
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        data_dict = {}
+        data_dict['pc_normal'] = self.data[idx]['pc_normal']
+        # normalize pc coor
+        pc_coor = data_dict['pc_normal'][:, :3]
+        normals = data_dict['pc_normal'][:, 3:]
+        pc_coor, center, scale = normalize_to_unit_cube(pc_coor, scale_factor=0.9995)
+        data_dict['file_name'] = self.data[idx]['file_name']
+        pc_coor = pc_coor.astype(np.float32)
+        normals = normals.astype(np.float32)
+        point_cloud = trimesh.PointCloud(pc_coor)
+        point_cloud.metadata['normals'] = normals
+        try:
+            point_cloud.export(os.path.join(self.output_dir, f"{data_dict['file_name']}.ply"))
+        except Exception as e:
+            print(f"fail to save point clouds: {e}")
+        assert (np.linalg.norm(normals, axis=-1) > 0.99).all(), "normals should be unit vectors, something wrong"
+        data_dict['pc_normal'] = np.concatenate([pc_coor, normals], axis=-1, dtype=np.float16)
+        vertices = self.data[idx]['vertices']
+        faces = self.data[idx]['faces']
+        bounds = np.array([pc_coor.min(axis=0), pc_coor.max(axis=0)])
+        pc_center = (bounds[0] + bounds[1])[None, :] / 2
+        pc_scale = ((bounds[1] - bounds[0]).max() + 1e-5)
+        data_dict['transform_params'] = torch.tensor([
+            center[0], center[1], center[2],
+            scale,
+            pc_center[0][0], pc_center[0][1], pc_center[0][2],
+            pc_scale
+        ], dtype=torch.float32)
+        data_dict['vertices'] = vertices
+        data_dict['faces']= faces
+        return data_dict
+def get_args():
+    parser = argparse.ArgumentParser("SkeletonGPT", add_help=False)
+    parser.add_argument("--input_pc_num", default=8192, type=int)
+    parser.add_argument("--num_beams", default=1, type=int)
+    parser.add_argument('--input_dir', default=None, type=str, help="input mesh directory")
+    parser.add_argument('--input_path', default=None, type=str, help="input mesh path")
+    parser.add_argument("--output_dir", default="outputs", type=str)
+    parser.add_argument('--llm', default="facebook/opt-350m", type=str, help="The LLM backend")
+    parser.add_argument("--pad_id", default=-1, type=int, help="padding id")
+    parser.add_argument("--n_discrete_size", default=128, type=int, help="discretized 3D space")
+    parser.add_argument("--n_max_bones", default=100, type=int, help="max number of bones")
+    parser.add_argument('--dataset_path', default="combine_256_updated", type=str, help="data path")
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument("--precision", default="fp16", type=str)
+    parser.add_argument("--batchsize_per_gpu", default=1, type=int)
+    parser.add_argument('--pretrained_weights', default=None, type=str)
+    parser.add_argument('--save_name', default="infer_results", type=str)
+    parser.add_argument("--save_render", default=False, action="store_true", help="save rendering results of mesh with skel")
+    parser.add_argument("--apply_marching_cubes", default=False, action="store_true")
+    parser.add_argument("--octree_depth", default=7, type=int)
+    parser.add_argument("--hier_order", default=False, action="store_true")
+    parser.add_argument("--joint_token", default=False, action="store_true", help="use joint_based tokenization")
+    parser.add_argument("--seq_shuffle", default=False, action="store_true", help="shuffle the skeleton sequence")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = get_args()
+    output_dir = f'{args.output_dir}/{args.save_name}'
+    os.makedirs(output_dir, exist_ok=True)
+    save_args(args, output_dir)
+    kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
+    accelerator = Accelerator(
+        kwargs_handlers=[kwargs],
+        mixed_precision=args.precision,
+    )
+    model = SkeletonGPT(args).cuda()
+    if args.pretrained_weights is not None:
+        pkg = torch.load(args.pretrained_weights, map_location=torch.device("cpu"))
+        model.load_state_dict(pkg["model"])
+    else:
+        raise ValueError("Pretrained weights must be provided.")
+    model.eval()
+    set_seed(args.seed)
+    # create dataset
+    if args.input_dir is not None:
+        input_list = sorted(os.listdir(args.input_dir))
+        input_list = [os.path.join(args.input_dir, x) for x in input_list if x.endswith('.ply') or x.endswith('.obj') or x.endswith('.stl')]
+        dataset = Dataset(input_list, args.input_pc_num, args.apply_marching_cubes, args.octree_depth, output_dir)
+    elif args.input_path is not None:
+        dataset = Dataset([args.input_path], args.input_pc_num, args.apply_marching_cubes, args.octree_depth, output_dir)
+    else:
+        raise ValueError("input_dir or input_path must be provided.")
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size= 1,
+        drop_last = False,
+        shuffle = False,
+    )
+    dataloader, model = accelerator.prepare(dataloader, model)
+    for curr_iter, batch_data_label in tqdm(enumerate(dataloader), total=len(dataloader)):
+        with accelerator.autocast():
+            pred_bone_coords = model.generate(batch_data_label)
+        # determine the output file name
+        file_name = os.path.basename(batch_data_label['file_name'][0])
+        pred_skel_filename = os.path.join(output_dir, f'{file_name}_skel.obj')
+        pred_rig_filename = os.path.join(output_dir, f"{file_name}_pred.txt")
+        mesh_filename = os.path.join(output_dir, f"{file_name}_mesh.obj")
+        transform_params = batch_data_label['transform_params'][0].cpu().numpy()
+        trans = transform_params[:3]
+        scale = transform_params[3]
+        pc_trans = transform_params[4:7]
+        pc_scale = transform_params[7]
+        vertices = batch_data_label['vertices'][0].cpu().numpy()
+        faces = batch_data_label['faces'][0].cpu().numpy()
+        skeleton = pred_bone_coords[0].cpu().numpy()
+        pred_joints, pred_bones = pred_joints_and_bones(skeleton.squeeze())
+        # Post process: merge duplicate or nearby joints and deduplicate bones.
+        if args.hier_order: # for MagicArticulate hier order
+            pred_root_index = pred_bones[0][0]
+            pred_joints, pred_bones, pred_root_index = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones, root_index=pred_root_index)
+        else: # for Puppeteer or MagicArticulate spaital order
+            pred_joints, pred_bones = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones)
+            pred_root_index = None
+        # when save rig to txt, denormalize the skeletons to the same scale with input meshes
+        pred_joints_denorm = pred_joints * pc_scale + pc_trans # first align with point cloud
+        pred_joints_denorm = pred_joints_denorm / scale + trans # then align with original mesh
+        if args.joint_token:
+            pred_root_index = save_skeleton_to_txt_joint(pred_joints_denorm, pred_bones, pred_rig_filename)
+        else:
+            save_skeleton_to_txt(pred_joints_denorm, pred_bones, pred_root_index, args.hier_order, vertices, pred_rig_filename)
+        # save skeletons
+        if args.hier_order or args.joint_token:
+            save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, pred_root_index, use_cone=True)
+        else:
+            save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, use_cone=False)
+        # when saving mesh and rendering, use normalized vertices (-0.5,0.5)
+        vertices_norm = (vertices - trans) * scale
+        vertices_norm = (vertices_norm - pc_trans) / pc_scale
+        save_mesh(vertices_norm, faces, mesh_filename)
+        # render mesh w/ skeleton
+        if args.save_render:
+            if args.hier_order or args.joint_token:
+                render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred', root_idx=pred_root_index)
+            else:
+                render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred')

third_party/Puppeteer/skeleton/demo.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+CUDA_VISIBLE_DEVICES=0 python demo.py --input_dir ./examples \
+            --pretrained_weights skeleton_ckpts/puppeteer_skeleton_w_diverse_pose.pth \
+            --save_name infer_results_demo --input_pc_num 8192 \
+            --save_render --apply_marching_cubes --joint_token --seq_shuffle
+# If you found the results not satisfactory, try the model trained with bone-based tokenization:
+# CUDA_VISIBLE_DEVICES=0 python demo.py --input_dir ./examples \
+#             --pretrained_weights skeleton_ckpts/puppeteer_skeleton_w_diverse_pose_bone_token.pth \
+#             --save_name infer_results_demo_bone_token --input_pc_num 8192 \
+#             --save_render --apply_marching_cubes --hier_order --seq_shuffle
+# If you want to run the demo using MagicArticulate weights, run:
+# CUDA_VISIBLE_DEVICES=0 python demo.py --input_dir ./examples \
+#             --pretrained_weights skeleton_ckpts/checkpoint_trainonv2_hier.pth \
+#             --save_name infer_results_demo_magicarti --input_pc_num 8192 \
+#             --save_render --apply_marching_cubes --hier_order

third_party/Puppeteer/skeleton/download.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from huggingface_hub import hf_hub_download
+file_path = hf_hub_download(
+    repo_id="Maikou/Michelangelo",
+    filename="checkpoints/aligned_shape_latents/shapevae-256.ckpt",
+    local_dir="third_partys/Michelangelo"
+)
+file_path = hf_hub_download(
+    repo_id="Seed3D/Puppeteer",
+    filename="skeleton_ckpts/puppeteer_skeleton_w_diverse_pose.pth",
+    local_dir="skeleton"
+)
+file_path = hf_hub_download(
+    repo_id="Seed3D/Puppeteer",
+    filename="skeleton_ckpts/puppeteer_skeleton_wo_diverse_pose.pth",
+    local_dir="skeleton"
+)
+file_path = hf_hub_download(
+    repo_id="Seed3D/Puppeteer",
+    filename="skeleton_ckpts/puppeteer_skeleton_w_diverse_pose_bone_token.pth",
+    local_dir="skeleton"
+)