diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..53aee21b9a286bf9d1904e9527c3f0b047f4f0ea
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..6e51e21cb19763592c6a71cf2a6e19ea2f77f2be
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+
+# local test scripts
+*test_puppeteer_api*.ps1
+logs/
+results/
+
+
+# local test scripts
+*test_puppeteer_api*.ps1
+logs/
+results/
+
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..1f29267615017f7c437df9c178c8403b9d1aeaac
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,85 @@
+# ============================================================
+# Puppeteer GPU API Dockerfile (Final, CUDA 11.8 + A10G Ready)
+# ============================================================
+
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+
+# ------------------------------------------------------------
+# OS Dependencies
+# ------------------------------------------------------------
+RUN apt-get update && apt-get install -y \
+ python3 python3-pip python3-venv \
+ git wget curl unzip ffmpeg bash \
+ libgl1 libglib2.0-0 \
+ && rm -rf /var/lib/apt/lists/* \
+ && ln -sf /usr/bin/python3 /usr/bin/python
+
+# ------------------------------------------------------------
+# Environment Variables
+# ------------------------------------------------------------
+ENV PIP_NO_CACHE_DIR=1 \
+ PYTHONUNBUFFERED=1 \
+ # OMP 경고 제거 및 단일 스레드 고정 (libgomp 에러 회피)
+ OMP_NUM_THREADS=1 \
+ MKL_THREADING_LAYER=SEQUENTIAL \
+ # 입출력 경로
+ TMP_IN_DIR=/data/in \
+ RESULT_DIR=/data/results
+
+RUN python -m pip install --upgrade pip
+
+# ------------------------------------------------------------
+# Build Cache Busting (optional, force rebuild)
+# ------------------------------------------------------------
+ARG CACHE_BUST=2025-11-05-01-30
+
+# ------------------------------------------------------------
+# Work Directory
+# ------------------------------------------------------------
+WORKDIR /app
+
+# ------------------------------------------------------------
+# Python Dependencies (Torch 제외)
+# ------------------------------------------------------------
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+
+# ------------------------------------------------------------
+# CUDA용 PyTorch 설치 (cu118)
+# ------------------------------------------------------------
+RUN pip uninstall -y torch torchvision torchaudio || true
+RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu118 \
+ torch torchvision torchaudio
+
+# ------------------------------------------------------------
+# App Source
+# ------------------------------------------------------------
+COPY app.py /app/app.py
+
+# ------------------------------------------------------------
+# Puppeteer Vendor (벤더 코드)
+# ------------------------------------------------------------
+COPY third_party/Puppeteer /app/Puppeteer
+RUN chmod +x /app/Puppeteer/demo_rigging.sh || true
+
+# PYTHONPATH: app / Puppeteer / third_party
+ENV PYTHONPATH=/app:/app/Puppeteer:/app/Puppeteer/third_party:$PYTHONPATH
+
+# 일부 코드가 'third_partys' 를 import 하는 경우 대비
+RUN ln -s /app/Puppeteer/third_party /app/third_partys || true \
+ && touch /app/Puppeteer/third_party/__init__.py
+
+# ------------------------------------------------------------
+# Writable Paths
+# ------------------------------------------------------------
+RUN mkdir -p /data/in /data/results && chmod -R 777 /data
+
+# ------------------------------------------------------------
+# ------------------------------------------------------------
+# Entrypoint (cd /app 보장)
+# ------------------------------------------------------------
+RUN printf '#!/bin/bash\nset -euo pipefail\ncd /app\npython -c "import importlib, sys; import app; print(\\"[boot] app imported OK\\")" || exit 1\nuvicorn app:app --host 0.0.0.0 --port ${PORT:-7860}\n' > /app/run.sh \
+ && chmod +x /app/run.sh
+
+EXPOSE 7860
+CMD ["sh", "-c", "/app/run.sh"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..524d15077b569c9feb74ba4c1c6afe9451b92d6f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,12 @@
+---
+title: Puppeteer Api
+emoji: 🏆
+colorFrom: yellow
+colorTo: blue
+sdk: docker
+pinned: false
+license: mit
+short_description: puppeteer-api
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..9aaab5f5a4de5fb8d2dd3a5a34c3b84010d84da8
--- /dev/null
+++ b/app.py
@@ -0,0 +1,344 @@
+import os
+import re
+import sys
+import shutil
+import subprocess
+from pathlib import Path
+from typing import List, Optional
+import importlib.util
+
+import requests
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
+from pydantic import BaseModel, HttpUrl, Field
+
+# ----------------------------------------------------------------------------- #
+# 🔧 환경 고정: libgomp 경고/에러 회피 (invalid OMP_NUM_THREADS)
+# ----------------------------------------------------------------------------- #
+# 일부 컨테이너에서 OMP_NUM_THREADS가 비어있거나 잘못 들어가면 libgomp가 에러를 냅니다.
+# 안전하게 정수값으로 강제 세팅합니다.
+os.environ["OMP_NUM_THREADS"] = os.environ.get("OMP_NUM_THREADS", "4")
+if not os.environ["OMP_NUM_THREADS"].isdigit():
+ os.environ["OMP_NUM_THREADS"] = "4"
+
+# ----------------------------------------------------------------------------- #
+# 🔧 런타임 의존성 자동 설치 (tqdm, einops, scipy, trimesh 등)
+# - requirements/Dockerfile에 빠진 경우를 대비해, 서버 기동 시 한 번 체크해서 설치
+# ----------------------------------------------------------------------------- #
+RUNTIME_DEPS = [
+ "tqdm",
+ "einops",
+ "scipy",
+ "trimesh",
+ "accelerate", # 추가
+ "timm", # 추가
+ # 아래는 여유 패키지 (에러 나면 자동 보강)
+ "networkx",
+ "scikit-image",
+]
+
+def _need_install(mod_name: str) -> bool:
+ return importlib.util.find_spec(mod_name) is None
+
+def _pip_install(pkgs: List[str]) -> None:
+ if not pkgs:
+ return
+ try:
+ subprocess.check_call([sys.executable, "-m", "pip", "install", *pkgs])
+ except Exception as e:
+ print(f"[deps] pip install failed for {pkgs}: {e}")
+
+def _ensure_runtime_deps() -> None:
+ # numpy 2.x면 scipy 등과 충돌 가능 → numpy<2로 내리는 시도
+ try:
+ import numpy as _np
+ if _np.__version__.startswith("2"):
+ print(f"[deps] numpy=={_np.__version__} detected; attempting to pin <2.0")
+ _pip_install(["numpy<2"])
+ except Exception as e:
+ print(f"[deps] numpy check failed: {e}")
+ # 필수 모듈 채우기
+ missing = [m for m in RUNTIME_DEPS if _need_install(m)]
+ if missing:
+ print(f"[deps] installing missing modules: {missing}")
+ _pip_install(missing)
+ # 최종 확인 로그
+ for m in RUNTIME_DEPS:
+ print(f"[deps] {m} -> {'OK' if not _need_install(m) else 'MISSING'}")
+
+_ensure_runtime_deps()
+
+# ----------------------------------------------------------------------------- #
+# FastAPI 초기화
+# ----------------------------------------------------------------------------- #
+app = FastAPI(title="Puppeteer API", version="1.0.0")
+
+# ----------------------------------------------------------------------------- #
+# Settings
+# ----------------------------------------------------------------------------- #
+PUPPETEER_SRC = Path(os.environ.get("PUPPETEER_DIR", "/app/Puppeteer")) # 읽기 전용 원본
+PUPPETEER_RUN = Path(os.environ.get("PUPPETEER_RUN", "/tmp/puppeteer_run")) # 실행용 복사본(쓰기 가능)
+RESULT_DIR = Path(os.environ.get("RESULT_DIR", str(PUPPETEER_RUN / "results"))) # rig 결과 기본 경로
+TMP_IN_DIR = Path(os.environ.get("TMP_IN_DIR", "/tmp/in")) # 입력 저장 경로
+DOWNLOAD_TIMEOUT = int(os.environ.get("DOWNLOAD_TIMEOUT", "180"))
+MAX_DOWNLOAD_MB = int(os.environ.get("MAX_DOWNLOAD_MB", "512"))
+SAFE_NAME = re.compile(r"[^A-Za-z0-9._-]+")
+# 애니메이션/리깅 결과를 폭넓게 찾기 위한 후보 경로
+RESULT_BASES = [
+ Path("/app/Puppeteer/results"),
+ RESULT_DIR,
+ Path("/data/results"),
+ Path("/tmp/puppeteer_run/results"),
+]
+
+# ----------------------------------------------------------------------------- #
+# Auto-download checkpoints (런타임 시 자동 다운로드)
+# ----------------------------------------------------------------------------- #
+ckpt_path = Path("/app/Puppeteer/checkpoints")
+if not ckpt_path.exists() or not any(ckpt_path.iterdir()):
+ try:
+ print("[init] checkpoints missing — trying runtime download via script...")
+ subprocess.run(
+ ["bash", "-lc", "cd /app/Puppeteer && ./scripts/download_ckpt.sh"],
+ check=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True,
+ )
+ print("[init] Puppeteer checkpoints downloaded successfully via script.")
+ except Exception as e:
+ print("[init] WARNING: checkpoint script failed:", e)
+ try:
+ ckpt_path.mkdir(parents=True, exist_ok=True)
+ print("[init] trying manual download from GitHub release...")
+ subprocess.run(
+ [
+ "wget",
+ "-O",
+ "/app/Puppeteer/checkpoints/rig.ckpt",
+ "https://github.com/ByteDance-Seed/Puppeteer/releases/download/v1.0.0/rig.ckpt",
+ ],
+ check=True,
+ )
+ print("[init] rig.ckpt downloaded manually.")
+ except Exception as e2:
+ print("[init] WARNING: manual checkpoint download failed:", e2)
+
+# ----------------------------------------------------------------------------- #
+# Schemas
+# ----------------------------------------------------------------------------- #
+class RigIn(BaseModel):
+ mesh_url: HttpUrl = Field(..., description="Input mesh URL (obj/glb/fbx/…)")
+ workdir: Optional[str] = Field(default=None, description="Optional work directory name")
+
+class RigOut(BaseModel):
+ status: str
+ result_dir: Optional[str] = None
+ files_preview: Optional[List[str]] = None
+ detail: Optional[str] = None
+ gpu: Optional[bool] = None
+ gpu_name: Optional[str] = None
+
+class AnimateIn(BaseModel):
+ video_url: HttpUrl = Field(..., description="Input video URL (mp4, mov, etc.)")
+ mesh_path: Optional[str] = Field(
+ default="/app/Puppeteer/results/rigged.glb",
+ description="Path to rigged mesh"
+ )
+
+# ----------------------------------------------------------------------------- #
+# Utils
+# ----------------------------------------------------------------------------- #
+def ensure_dirs() -> None:
+ TMP_IN_DIR.mkdir(parents=True, exist_ok=True)
+ PUPPETEER_RUN.mkdir(parents=True, exist_ok=True)
+ RESULT_DIR.mkdir(parents=True, exist_ok=True)
+
+def prepare_run_tree() -> None:
+ if not PUPPETEER_SRC.exists():
+ raise HTTPException(status_code=500, detail=f"Puppeteer not found: {PUPPETEER_SRC}")
+ shutil.copytree(PUPPETEER_SRC, PUPPETEER_RUN, dirs_exist_ok=True)
+ script = PUPPETEER_RUN / "demo_rigging.sh"
+ if script.exists():
+ script.chmod(0o755)
+
+def safe_basename(url: str) -> str:
+ name = os.path.basename(url.split("?")[0])
+ return SAFE_NAME.sub("_", name) or "input_mesh"
+
+def download_with_limit(url: str, dst: Path) -> None:
+ with requests.get(url, stream=True, timeout=DOWNLOAD_TIMEOUT) as r:
+ r.raise_for_status()
+ total = 0
+ with open(dst, "wb") as f:
+ for chunk in r.iter_content(chunk_size=1024 * 1024):
+ if not chunk:
+ continue
+ total += len(chunk)
+ if total > MAX_DOWNLOAD_MB * 1024 * 1024:
+ raise HTTPException(status_code=413, detail="File too large")
+ f.write(chunk)
+
+def torch_info() -> tuple[bool, Optional[str]]:
+ try:
+ import torch
+ ok = torch.cuda.is_available()
+ name = torch.cuda.get_device_name(0) if ok else None
+ return ok, name
+ except Exception:
+ return False, None
+
+def scan_results(limit: int = 200) -> List[str]:
+ files: List[str] = []
+ exts = ("*.glb", "*.mp4", "*.fbx", "*.obj", "*.gltf", "*.png", "*.jpg", "*.json", "*.txt")
+ for base in RESULT_BASES:
+ if base.exists():
+ for ext in exts:
+ for p in base.rglob(ext):
+ if p.is_file():
+ files.append(str(p))
+ if len(files) >= limit:
+ return files
+ return files
+
+# ----------------------------------------------------------------------------- #
+# Routes
+# ----------------------------------------------------------------------------- #
+@app.get("/")
+def root():
+ return {"status": "ready", "service": "puppeteer-api"}
+
+@app.get("/health")
+def health():
+ gpu, name = torch_info()
+ return {"status": "ok", "cuda": gpu, "gpu": name}
+
+@app.post("/rig", response_model=RigOut)
+def rig(inp: RigIn):
+ ensure_dirs()
+ prepare_run_tree()
+
+ basename = safe_basename(str(inp.mesh_url))
+ mesh_path = TMP_IN_DIR / basename
+ _ = SAFE_NAME.sub("_", inp.workdir or "job") # reserved, 현재는 미사용
+
+ try:
+ download_with_limit(str(inp.mesh_url), mesh_path)
+ except Exception as e:
+ raise HTTPException(status_code=400, detail=f"Download error: {e}")
+
+ script = PUPPETEER_RUN / "demo_rigging.sh"
+ cmd = ["bash", str(script), str(mesh_path)]
+ try:
+ proc = subprocess.run(
+ cmd,
+ cwd=str(PUPPETEER_RUN),
+ check=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True,
+ )
+ run_log = proc.stdout[-4000:]
+ except subprocess.CalledProcessError as e:
+ snippet = (e.stdout or "")[-2000:]
+ raise HTTPException(status_code=500, detail=f"Puppeteer failed: {snippet}")
+ except FileNotFoundError:
+ raise HTTPException(status_code=500, detail="demo_rigging.sh not found")
+
+ preview = scan_results(limit=20)
+ gpu, gpu_name = torch_info()
+ return RigOut(
+ status="ok",
+ result_dir=str(RESULT_DIR),
+ files_preview=preview[:10],
+ detail=run_log if preview else "no result files found",
+ gpu=gpu,
+ gpu_name=gpu_name,
+ )
+
+@app.post("/animate")
+def animate(inp: AnimateIn):
+ """
+ Puppeteer의 demo_animation.sh 실행 (영상 기반 애니메이션)
+ 입력: video_url (mp4), mesh_path (rigged.glb 기본값)
+ """
+ pdir = Path("/app/Puppeteer")
+ script = pdir / "demo_animation.sh"
+ video_path = Path("/tmp/video.mp4")
+
+ if not script.exists():
+ raise HTTPException(status_code=404, detail="demo_animation.sh not found")
+
+ # -------- requests 기반 영상 다운로드 -------- #
+ try:
+ print(f"[animate] downloading video from {inp.video_url}")
+ with requests.get(str(inp.video_url), stream=True, timeout=60) as r:
+ r.raise_for_status()
+ with open(video_path, "wb") as f:
+ for chunk in r.iter_content(chunk_size=8192):
+ if chunk:
+ f.write(chunk)
+ print(f"[animate] Video saved to {video_path}")
+ except Exception as e:
+ raise HTTPException(status_code=400, detail=f"Video download failed via requests: {e}")
+
+ # -------- Puppeteer 애니메이션 실행 -------- #
+ cmd = [
+ "bash", str(script),
+ "--mesh", str(inp.mesh_path),
+ "--video", str(video_path),
+ ]
+ try:
+ proc = subprocess.run(
+ cmd,
+ cwd=str(pdir),
+ check=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True,
+ )
+ output = proc.stdout[-4000:]
+ except subprocess.CalledProcessError as e:
+ raise HTTPException(status_code=500, detail=f"Animation failed: {e.stdout[-2000:]}")
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
+
+ anim_results = scan_results(limit=20)
+ return {
+ "status": "ok",
+ "video_used": str(inp.video_url),
+ "detail": output,
+ "files_preview": anim_results[:10],
+ }
+
+# -------- 결과 파일 확인/다운로드 유틸 -------- #
+@app.get("/list")
+def list_results():
+ files = scan_results(limit=500)
+ return {"count": len(files), "files": files}
+
+@app.get("/download")
+def download(path: str):
+ p = Path(path).resolve()
+ # 안전한 경로만 허용
+ if not any(str(p).startswith(str(b.resolve())) for b in RESULT_BASES):
+ raise HTTPException(status_code=400, detail="invalid path")
+ if not p.exists() or not p.is_file():
+ raise HTTPException(status_code=404, detail="file not found")
+ return FileResponse(str(p), filename=p.name)
+
+@app.get("/debug")
+def debug():
+ pdir = Path("/app/Puppeteer")
+ script = pdir / "demo_rigging.sh"
+ ckpt_dir = pdir / "checkpoints"
+ req_file = pdir / "requirements.txt"
+ return {
+ "script_exists": script.exists(),
+ "ckpt_dir_exists": ckpt_dir.exists(),
+ "req_exists": req_file.exists(),
+ "ckpt_samples": [str(p) for p in ckpt_dir.glob("**/*")][:15],
+ "tmp_in": os.environ.get("TMP_IN_DIR", "/data/in"),
+ "result_dir": os.environ.get("RESULT_DIR", "/data/results"),
+ "omp_num_threads": os.environ.get("OMP_NUM_THREADS"),
+ }
diff --git a/app_backup.py b/app_backup.py
new file mode 100644
index 0000000000000000000000000000000000000000..89ab73536e4e90012810e5a7fe7813c78cb76356
--- /dev/null
+++ b/app_backup.py
@@ -0,0 +1,52 @@
+import os, subprocess, requests
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+app = FastAPI()
+
+class RigIn(BaseModel):
+ mesh_url: str # 입력 파일 URL (obj, glb, fbx 등)
+
+@app.get("/")
+def root():
+ return {"message": "Puppeteer API (GPU) ready"}
+
+@app.get("/health")
+def health():
+ try:
+ import torch
+ gpu = torch.cuda.is_available()
+ name = torch.cuda.get_device_name(0) if gpu else None
+ return {"status": "ok", "cuda": gpu, "gpu": name}
+ except Exception as e:
+ return {"status": "ok", "cuda": False, "detail": str(e)}
+
+@app.post("/rig")
+def rig(inp: RigIn):
+ os.makedirs("/tmp/in", exist_ok=True)
+ mesh_path = os.path.join("/tmp/in", os.path.basename(inp.mesh_url))
+
+ # 1️⃣ 입력 파일 다운로드
+ with requests.get(inp.mesh_url, stream=True) as r:
+ r.raise_for_status()
+ with open(mesh_path, "wb") as f:
+ for chunk in r.iter_content(chunk_size=8192):
+ if chunk:
+ f.write(chunk)
+
+ # 2️⃣ Puppeteer 실행
+ workdir = "/app/Puppeteer"
+ cmd = ["bash", "demo_rigging.sh", mesh_path]
+ try:
+ subprocess.run(cmd, cwd=workdir, check=True)
+ except subprocess.CalledProcessError as e:
+ return {"status": "error", "detail": str(e)}
+
+ # 3️⃣ 결과 목록 반환
+ result_dir = os.path.join(workdir, "results")
+ files = []
+ for rootdir, _, filenames in os.walk(result_dir):
+ for fn in filenames:
+ files.append(os.path.join(rootdir, fn))
+ if len(files) >= 20: break
+ return {"status": "ok", "result_dir": result_dir, "files_preview": files[:10]}
\ No newline at end of file
diff --git a/app_backup_encoding.py b/app_backup_encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..a76d16d748100e5806a72cb73e2531703eab3cb8
--- /dev/null
+++ b/app_backup_encoding.py
@@ -0,0 +1,52 @@
+import os, subprocess, requests
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+app = FastAPI()
+
+class RigIn(BaseModel):
+혻 혻 mesh_url: str혻 # ?낅젰 ?뚯씪 URL (obj, glb, fbx ??
+
+@app.get("/")
+def root():
+혻 혻 return {"message": "Puppeteer API (GPU) ready"}
+
+@app.get("/health")
+def health():
+혻 혻 try:
+혻 혻 혻 혻 import torch
+혻 혻 혻 혻 gpu = torch.cuda.is_available()
+혻 혻 혻 혻 name = torch.cuda.get_device_name(0) if gpu else None
+혻 혻 혻 혻 return {"status": "ok", "cuda": gpu, "gpu": name}
+혻 혻 except Exception as e:
+혻 혻 혻 혻 return {"status": "ok", "cuda": False, "detail": str(e)}
+
+@app.post("/rig")
+def rig(inp: RigIn):
+혻 혻 os.makedirs("/tmp/in", exist_ok=True)
+혻 혻 mesh_path = os.path.join("/tmp/in", os.path.basename(inp.mesh_url))
+
+혻 혻 # 1截뤴깵 ?낅젰 ?뚯씪 ?ㅼ슫濡쒕뱶
+혻 혻 with requests.get(inp.mesh_url, stream=True) as r:
+혻 혻 혻 혻 r.raise_for_status()
+혻 혻 혻 혻 with open(mesh_path, "wb") as f:
+혻 혻 혻 혻 혻 혻 for chunk in r.iter_content(chunk_size=8192):
+혻 혻 혻 혻 혻 혻 혻 혻 if chunk:
+혻 혻 혻 혻 혻 혻 혻 혻 혻 혻 f.write(chunk)
+
+혻 혻 # 2截뤴깵 Puppeteer ?ㅽ뻾
+혻 혻 workdir = "/app/Puppeteer"
+혻 혻 cmd = ["bash", "demo_rigging.sh", mesh_path]
+혻 혻 try:
+혻 혻 혻 혻 subprocess.run(cmd, cwd=workdir, check=True)
+혻 혻 except subprocess.CalledProcessError as e:
+혻 혻 혻 혻 return {"status": "error", "detail": str(e)}
+
+혻 혻 # 3截뤴깵 寃곌낵 紐⑸줉 諛섑솚
+혻 혻 result_dir = os.path.join(workdir, "results")
+혻 혻 files = []
+혻 혻 for rootdir, _, filenames in os.walk(result_dir):
+혻 혻 혻 혻 for fn in filenames:
+혻 혻 혻 혻 혻 혻 files.append(os.path.join(rootdir, fn))
+혻 혻 혻 혻 혻 혻 if len(files) >= 20: break
+혻 혻 return {"status": "ok", "result_dir": result_dir, "files_preview": files[:10]}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db9e9aa7cb87e53a3de5ab2a1acac113b3ea7907
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,27 @@
+# server
+fastapi==0.115.5
+uvicorn[standard]==0.34.0
+pydantic==2.9.2
+requests==2.32.3
+
+# numeric stack (pin to avoid ABI woes)
+numpy<2
+scipy==1.11.4
+
+# geometry / images
+trimesh==4.4.9
+networkx==3.3
+scikit-image==0.24.0
+opencv-python-headless #
+# training/runtime utils
+tqdm==4.66.5
+einops==0.8.0
+accelerate==1.0.1
+timm==1.0.9
+
+# Hugging Face stack for skeleton step
+transformers==4.44.2
+tokenizers>=0.14.0
+safetensors>=0.4.2
+huggingface-hub>=0.23.0
+
diff --git a/samples/demo.mp4 b/samples/demo.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..ea9e57a7c66618bd83e468f8bdfa5dedc2b11c48
--- /dev/null
+++ b/samples/demo.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e66e01296a4984841baaf0b9542aed07a5d5eb84958135a8d612b9ff1ec9419
+size 574823
diff --git a/test_puppeteer_api_v2.ps1 b/test_puppeteer_api_v2.ps1
new file mode 100644
index 0000000000000000000000000000000000000000..20fd49332ad9b82f0392e234dec0273129e4ed6e
--- /dev/null
+++ b/test_puppeteer_api_v2.ps1
@@ -0,0 +1,57 @@
+param(
+ [string]$BaseUrl = "https://seungminkwak-puppeteer-api.hf.space",
+ [string]$Token = "",
+ [string]$MeshUrl = "https://cdn.jsdelivr.net/gh/KhronosGroup/glTF-Sample-Models@master/2.0/CesiumMan/glTF-Binary/CesiumMan.glb",
+ [string]$Workdir = "job-cesium",
+ [int]$MaxTries = 12,
+ [int]$DelaySec = 10
+)
+
+$ErrorActionPreference = "Stop"
+
+Write-Host "=== Puppeteer API quick test ==="
+Write-Host "[1] /health"
+$health = Invoke-RestMethod -Uri "$BaseUrl/health" -Headers @{ Authorization = "Bearer $Token" }
+$health | ConvertTo-Json -Depth 6 | Write-Host
+
+Write-Host "[2] /rig"
+$body = @{ mesh_url = $MeshUrl; workdir = $Workdir } | ConvertTo-Json -Depth 6
+$resp = Invoke-RestMethod -Uri "$BaseUrl/rig" -Headers @{ Authorization = "Bearer $Token"; "Content-Type"="application/json" } -Method POST -Body $body
+$resp | ConvertTo-Json -Depth 6 | Write-Host
+
+Write-Host "[3] Poll /list"
+$files = @()
+for ($i=1; $i -le $MaxTries; $i++) {
+ try {
+ $list = Invoke-RestMethod -Uri "$BaseUrl/list" -Headers @{ Authorization = "Bearer $Token" }
+ if ($list.files_preview) {
+ $files = $list.files_preview
+ Write-Host (" -> Found: {0}" -f ($files -join ", "))
+ break
+ } else {
+ Write-Host (" -> Try {0}/{1}: no files yet" -f $i, $MaxTries)
+ }
+ } catch {
+ Write-Host (" -> Try {0}/{1}: error {2}" -f $i, $MaxTries, $_.Exception.Message)
+ }
+ Start-Sleep -Seconds $DelaySec
+}
+
+if (-not $files -or $files.Count -eq 0) {
+ Write-Host "No result files found." -ForegroundColor Red
+ exit 2
+}
+
+# choose a file
+$preferred = "/data/results/rigged.glb"
+$target = if ($files -contains $preferred) { $preferred } else { $files[0] }
+Write-Host ("[4] Download {0}" -f $target)
+
+$enc = [uri]::EscapeDataString($target)
+$newDir = Join-Path $PWD "results"
+New-Item -ItemType Directory -Path $newDir -Force | Out-Null
+$out = Join-Path $newDir (Split-Path -Leaf $target)
+Invoke-WebRequest -Uri "$BaseUrl/download?path=$enc" -Headers @{ Authorization = "Bearer $Token" } -OutFile $out
+Write-Host ("Saved to {0}" -f $out)
+try { ii $out | Out-Null } catch {}
+Write-Host "=== Done ==="
diff --git a/third_party/Puppeteer/.gitmodules b/third_party/Puppeteer/.gitmodules
new file mode 100644
index 0000000000000000000000000000000000000000..404870469b97b4d02426f965ed82015d75334295
--- /dev/null
+++ b/third_party/Puppeteer/.gitmodules
@@ -0,0 +1,15 @@
+[submodule "animation/third_partys/ptlflow"]
+ path = animation/third_partys/ptlflow
+ url = https://github.com/ChaoyueSong/ptlflow
+[submodule "animation/third_partys/co_tracker"]
+ path = animation/third_partys/co_tracker
+ url = https://github.com/ChaoyueSong/co_tracker
+[submodule "animation/third_partys/Video_Depth_Anything"]
+ path = animation/third_partys/Video_Depth_Anything
+ url = https://github.com/ChaoyueSong/Video_Depth_Anything
+[submodule "skinning/third_partys/PartField"]
+ path = skinning/third_partys/PartField
+ url = https://github.com/ChaoyueSong/PartField
+[submodule "skeleton/third_partys/Michelangelo"]
+ path = skeleton/third_partys/Michelangelo
+ url = https://github.com/ChaoyueSong/Michelangelo/
diff --git a/third_party/Puppeteer/LICENSE b/third_party/Puppeteer/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64
--- /dev/null
+++ b/third_party/Puppeteer/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/third_party/Puppeteer/README.md b/third_party/Puppeteer/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..117ad5519ac3825d4f2cb8d72bac6c123c36963b
--- /dev/null
+++ b/third_party/Puppeteer/README.md
@@ -0,0 +1,105 @@
+
+
+
+
+Puppeteer is proposed for **automatic rigging and animation of 3D objects**. Given a 3D object, Puppeteer first automatically generates skeletal structures and skinning weights, then animates the rigged model with video guidance through a differentiable optimization pipeline. This comprehensive approach aims to enable fully automated transformation of static 3D models into dynamically animated assets, eliminating the need for manual rigging expertise and significantly streamlining 3D content creation workflows.
+
+
+
+## 🔥 News
+- Sep 09, 2025: We uploaded the [video](https://www.youtube.com/watch?v=DnKx803JHyI) for Puppeteer.
+- Sep 04, 2025: Release the inference codes and [model checkpoints](https://huggingface.co/Seed3D/Puppeteer).
+- Aug 15, 2025: Release [paper](https://arxiv.org/abs/2508.10898) of Puppeteer!
+
+
+## 🔧 Installtation
+We use Python 3.10 with PyTorch 2.1.1 and CUDA 11.8. The environment and required packages can be installed as follows:
+
+```
+git clone https://github.com/ByteDance-Seed/Puppeteer.git --recursive && cd Puppeteer
+conda create -n puppeteer python==3.10.13 -y
+conda activate puppeteer
+pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1 --index-url https://download.pytorch.org/whl/cu118
+pip install -r requirements.txt
+pip install flash-attn==2.6.3 --no-build-isolation
+pip install torch-scatter -f https://data.pyg.org/whl/torch-2.1.1+cu118.html
+pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu118_pyt211/download.html
+```
+
+## 🚀 Demo
+
+We provide a complete pipeline for rigging and animating 3D models. Before running the pipeline, visit each folder (skeleton, skinning, animation) to download the necessary model checkpoints. Example data is available in the [examples](https://github.com/ByteDance-Seed/Puppeteer/tree/main/examples) folder.
+
+### Rigging
+
+Given 3D meshes, we first predict the skeleton and skinning weights:
+
+```
+bash demo_rigging.sh
+```
+
+The final rig files will be saved in `results/final_rigging`. To evaluate the [skeleton](https://github.com/ByteDance-Seed/Puppeteer/tree/main/skeleton) and [skinning](https://github.com/ByteDance-Seed/Puppeteer/tree/main/skinning) components separately, refer to their respective folders.
+
+### Video-guided 3D animation
+
+To animate the rigged model using video guidance, run:
+
+```
+bash demo_animation.sh
+```
+
+The rendered 3D animation sequence from different views will be saved in `results/animation`. Refer to the [animation folder](https://github.com/ByteDance-Seed/Puppeteer/tree/main/animation) for comprehensive details on data processing and structure.
+
+
+## 😊 Acknowledgment
+
+The code builds upon [MagicArticulate](https://github.com/Seed3D/MagicArticulate), [MeshAnything](https://github.com/buaacyw/MeshAnything), [Functional Diffusion](https://1zb.github.io/functional-diffusion/), [RigNet](https://github.com/zhan-xu/RigNet), [Michelangelo](https://github.com/NeuralCarver/Michelangelo/), [PartField](https://github.com/nv-tlabs/PartField), [AnyMole](https://github.com/kwanyun/AnyMoLe) and [Lab4D](https://github.com/lab4d-org/lab4d). We gratefully acknowledge the authors for making their work publicly available.
+
+
+## 📚 Citation
+
+```
+@article{song2025puppeteer,
+ title={Puppeteer: Rig and Animate Your 3D Models},
+ author={Chaoyue Song and Xiu Li and Fan Yang and Zhongcong Xu and Jiacheng Wei and Fayao Liu and Jiashi Feng and Guosheng Lin and Jianfeng Zhang},
+ journal={arXiv preprint arXiv:2508.10898},
+ year={2025}
+}
+```
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/README.md b/third_party/Puppeteer/animation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b6116366382339961cfa54e623c957432393d417
--- /dev/null
+++ b/third_party/Puppeteer/animation/README.md
@@ -0,0 +1,76 @@
+# 3D Animation with Video Guidance
+This repository provides a complete pipeline for generating 3D object animations with video guidance. The system includes data processing and optimization algorithms for rigging-based animation.
+
+## Overview
+The pipeline takes a rigged 3D model and a reference video, then optimizes the object's motion to match the video guidance while maintaining realistic skeletal constraints.
+
+## Prerequisites
+
+### Model Downloads
+Download the required pre-trained models:
+
+- [Video-Depth-Anything](https://huggingface.co/depth-anything/Video-Depth-Anything-Large) - For depth estimation
+- [CoTracker3](https://huggingface.co/facebook/cotracker3) - For point tracking
+
+```
+python download.py
+```
+
+### Input Data Structure
+
+Organize your input data as follows:
+```
+inputs/
+└── {seq_name}/
+ ├── objs/
+ │ ├── mesh.obj # 3D mesh geometry
+ │ ├── rig.txt # Rigging definition
+ │ ├── material.mtl # Material properties (optional)
+ │ └── texture.png # Texture maps (optional)
+ ├── first_frames/ # Rendered initial frames
+ ├── imgs/ # Extracted video frames
+ ├── flow/ # Optical flow data
+ ├── flow_vis/ # Visualized optical flow
+ ├── depth/ # Esitmated depth data
+ ├── track/ # tracked joints/vertices
+ └── input.mp4 # Source video
+```
+
+## Data Processing
+
+Given a 3D model with rigging under `inputs/{seq_name}/objs` (`mesh.obj, rig.txt`, optional `.mtl` and texture `.png`), we first render the object from a specified viewpoint. This image is used as the input (first frame) to the video generation model (e.g., [Jimeng AI](https://jimeng.jianying.com/ai-tool/home?type=video)).
+
+```
+python utils/render_first_frame.py --input_path inputs --seq_name {seq_name}
+```
+Replace `{seq_name}` with your sequence name. The first-frame images are saved to `inputs/{seq_name}/first_frames`. This generates reference images from 4 different viewpoints (you can add more). Choose the viewpoint that best shows the object's joints and key parts for optimal animation results. Save the generated videos to `inputs/{seq_name}/input.mp4`.
+
+Then we extract the frames from the video by running:
+
+```
+cd inputs/{seq_name}; mkdir imgs
+ffmpeg -i input.mp4 -vf fps=10 imgs/frame_%04d.png
+cd ../../
+```
+
+Estimate optical flows by running:
+
+```
+python utils/save_flow.py --input_path inputs --seq_name {seq_name}
+```
+The flow `.flo` files are saved to `inputs/{seq_name}/flow`, the flow visualization are saved to `inputs/{seq_name}/flow_vis`. Depth and tracking information are saved during optimization.
+
+## Optimization
+
+To optimize the animation, you can run
+
+```
+bash demo.sh
+```
+
+The results are saved to `results/{seq_name}/{save_name}`. Modify `--main_renderer` and `--additional_renderers` to change rendering viewpoints. If animations exhibit jitter or instability, increase the root/joint smoothing weights for better temporal consistency.
+
+
+## TODO
+
+- [ ] Add multi-view supervisions.
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/demo.sh b/third_party/Puppeteer/animation/demo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3092f797d7a91cd568bfdb8697a0ff80de1d9dc1
--- /dev/null
+++ b/third_party/Puppeteer/animation/demo.sh
@@ -0,0 +1,7 @@
+python optimization.py --save_path results --iter 200 --input_path inputs --img_size 960 \
+ --seq_name 'fish' --save_name 'fish' --coherence_weight 5
+
+# python optimization.py --save_path results --iter 200 --input_path inputs --img_size 960 \
+# --seq_name 'crocodile' --save_name 'crocodile_demo' --coherence_weight 15
+
+
diff --git a/third_party/Puppeteer/animation/download.py b/third_party/Puppeteer/animation/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..04ca0470417626a54b1b4a7b80cf0d956c2d5010
--- /dev/null
+++ b/third_party/Puppeteer/animation/download.py
@@ -0,0 +1,13 @@
+from huggingface_hub import hf_hub_download
+
+file_path = hf_hub_download(
+ repo_id="facebook/cotracker3",
+ filename="scaled_offline.pth",
+ local_dir="third_partys/co_tracker/ckpt"
+)
+
+file_path = hf_hub_download(
+ repo_id="depth-anything/Video-Depth-Anything-Large",
+ filename="video_depth_anything_vitl.pth",
+ local_dir="third_partys/Video_Depth_Anything/ckpt"
+)
diff --git a/third_party/Puppeteer/animation/model.py b/third_party/Puppeteer/animation/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..485edf79cdb183368c4c455c81cbac3f0b0f70dc
--- /dev/null
+++ b/third_party/Puppeteer/animation/model.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from typing import List, Optional, Tuple, Union
+from collections import deque
+from pytorch3d.structures import Meshes, join_meshes_as_scene
+from pytorch3d.renderer import TexturesVertex, TexturesUV
+
+from utils.quat_utils import quat_to_transform_matrix, quat_multiply, quat_rotate_vector
+
+class RiggingModel:
+ """
+ A 3D rigged model supporting skeletal animation.
+
+ Handles mesh geometry, skeletal hierarchy, skinning weights, and
+ linear blend skinning (LBS) deformation.
+ """
+ def __init__(self, device = "cuda:0"):
+ self.device = device
+ # Mesh data
+ self.vertices: List[torch.Tensor] = []
+ self.faces: List[torch.Tensor] = []
+ self.textures: List[Union[TexturesVertex, TexturesUV]] = []
+
+ # Skeletal data
+ self.bones: Optional[torch.Tensor] = None # (N, 2) [parent, child] pairs
+ self.parent_indices: Optional[torch.Tensor] = None # (J,) parent index for each joint
+ self.root_index: Optional[int] = None # Root joint index
+ self.joints_rest: Optional[torch.Tensor] = None # (J, 3) rest pose positions
+ self.skin_weights: List[torch.Tensor] = [] # List of (V_i, J) skinning weights
+
+ # Fixed local positions
+ self.rest_local_positions: Optional[torch.Tensor] = None # (J, 3)
+
+ # Computed data
+ self.bind_matrices_inv: Optional[torch.Tensor] = None # (J, 4, 4) inverse bind matrices
+ self.deformed_vertices: Optional[List[torch.Tensor]] = None # List of (T, V_i, 3)
+ self.joint_positions: Optional[torch.Tensor] = None # (T, J, 3) current joint positions
+
+ # Validation flags
+ self._bind_matrices_initialized = False
+
+ def initialize_bind_matrices(self, rest_local_pos):
+ """Initialize bind matrices and store rest local positions."""
+ self.rest_local_positions = rest_local_pos.to(self.device)
+
+ J = rest_local_pos.shape[0]
+ rest_global_quats, rest_global_pos = self.forward_kinematics(
+ torch.tensor([[[1.0, 0.0, 0.0, 0.0]] * J], device=self.device), # unit quaternion
+ self.parent_indices,
+ self.root_index
+ )
+
+ bind_matrices = quat_to_transform_matrix(rest_global_quats, rest_global_pos) # (1,J,4,4)
+ self.bind_matrices_inv = torch.inverse(bind_matrices.squeeze(0)) # (J,4,4)
+
+ self._bind_matrices_initialized = True
+
+ def animate(self, local_quaternions, root_quaternion = None, root_position = None):
+ """
+ Animate the model using local joint transformations.
+
+ Args:
+ local_quaternions: (T, J, 4) local rotations per frame
+ root_quaternion: (T, 4) global root rotation
+ root_position: (T, 3) global root translation
+ """
+ if not self._bind_matrices_initialized:
+ raise RuntimeError("Bind matrices not initialized. Call initialize_bind_matrices() first.")
+
+ # Forward kinematics
+ global_quats, global_pos = self.forward_kinematics(
+ local_quaternions,
+ self.parent_indices,
+ self.root_index
+ )
+ self.joint_positions = global_pos
+
+ joint_transforms = quat_to_transform_matrix(global_quats, global_pos) # (T, J, 4, 4)
+
+ # Apply global root transformation if provided
+ if root_quaternion is not None and root_position is not None:
+ root_transform = quat_to_transform_matrix(root_quaternion, root_position)
+ joint_transforms = root_transform[:, None] @ joint_transforms
+ self.joint_positions = joint_transforms[..., :3, 3]
+
+ # Linear blend skinning
+ self.deformed_vertices = []
+ for i, vertices in enumerate(self.vertices):
+ deformed = self._linear_blend_skinning(
+ vertices,
+ joint_transforms,
+ self.skin_weights[i],
+ self.bind_matrices_inv
+ )
+ self.deformed_vertices.append(deformed)
+
+
+ def get_mesh(self, frame_idx=None):
+ meshes = []
+ for i in range(len(self.vertices)):
+ mesh = Meshes(
+ verts=[self.vertices[i]] if frame_idx is None or self.deformed_vertices is None else [self.deformed_vertices[i][frame_idx]],
+ faces=[self.faces[i]],
+ textures=self.textures[i]
+ )
+ meshes.append(mesh)
+ return join_meshes_as_scene(meshes)
+
+ def _linear_blend_skinning(self, vertices, joint_transforms, skin_weights, bind_matrices_inv):
+ """
+ Apply linear blend skinning to vertices.
+
+ Args:
+ vertices: (V, 3) vertex positions
+ joint_transforms: (T, J, 4, 4) joint transformation matrices
+ skin_weights: (V, J) per-vertex joint weights
+ bind_matrices_inv: (J, 4, 4) inverse bind matrices
+
+ Returns:
+ (T, V, 3) deformed vertices
+ """
+ # Compute final transformation matrices
+ transforms = torch.matmul(joint_transforms, bind_matrices_inv) # (T, J, 4, 4)
+
+ # Weight and blend transformations
+ weighted_transforms = torch.einsum('vj,tjab->tvab', skin_weights, transforms) # (T, V, 4, 4)
+
+ # Apply to vertices
+ vertices_hom = torch.cat([vertices, torch.ones(vertices.shape[0], 1, device=vertices.device)], dim=-1)
+ deformed = torch.matmul(weighted_transforms, vertices_hom.unsqueeze(-1)).squeeze(-1)
+
+ return deformed[..., :3]
+
+ def forward_kinematics(self, local_quaternions, parent_indices, root_index = 0):
+ """
+ Compute global joint transformations from local ones.
+
+ Args:
+ local_quaternions: (B, J, 4) local rotations
+ parent_indices: (J,) parent index for each joint
+ root_index: Root joint index
+
+ Returns:
+ Tuple of (global_quaternions, global_positions)
+ """
+ B, J = local_quaternions.shape[:2]
+ local_positions = self.rest_local_positions.unsqueeze(0).expand(B, -1, -1)
+
+
+ # Initialize storage
+ global_quats = [None] * J
+ global_positions = [None] * J
+
+ # Build children mapping
+ children = [[] for _ in range(J)]
+ for child_idx in range(J):
+ parent_idx = parent_indices[child_idx]
+ if parent_idx >= 0:
+ children[parent_idx].append(child_idx)
+
+ # Breadth-first traversal from root
+ queue = deque([root_index])
+ visited = {root_index}
+
+ # Process root
+ global_quats[root_index] = local_quaternions[:, root_index]
+ global_positions[root_index] = local_positions[:, root_index]
+
+ while queue:
+ current = queue.popleft()
+ current_quat = global_quats[current]
+ current_pos = global_positions[current]
+
+ for child in children[current]:
+ if child not in visited:
+ visited.add(child)
+ queue.append(child)
+
+ # Transform child to global space
+ child_quat = quat_multiply(current_quat, local_quaternions[:, child])
+ child_pos = quat_rotate_vector(current_quat, local_positions[:, child]) + current_pos
+
+ global_quats[child] = child_quat
+ global_positions[child] = child_pos
+
+ return torch.stack(global_quats, dim=1), torch.stack(global_positions, dim=1)
diff --git a/third_party/Puppeteer/animation/optimization.py b/third_party/Puppeteer/animation/optimization.py
new file mode 100644
index 0000000000000000000000000000000000000000..1739a46f986135bbec4b81280accf66d760cb9bc
--- /dev/null
+++ b/third_party/Puppeteer/animation/optimization.py
@@ -0,0 +1,626 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import argparse
+import json
+import numpy as np
+import logging
+import glob
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from tqdm import tqdm
+
+from renderer import MeshRenderer3D
+from model import RiggingModel
+from utils.quat_utils import (
+ compute_rest_local_positions, quat_inverse, quat_log, quat_multiply
+)
+from utils.loss_utils import (
+ DepthModule, compute_reprojection_loss, geodesic_loss, root_motion_reg,
+ calculate_flow_loss, compute_depth_loss_normalized, joint_motion_coherence
+)
+from utils.data_loader import load_model_from_obj_and_rig, prepare_depth
+from utils.save_utils import (
+ save_args, visualize_joints_on_mesh, save_final_video,
+ save_and_smooth_results, visualize_points_on_mesh, save_track_points
+)
+from utils.misc import warmup_then_decay
+from third_partys.co_tracker.save_track import save_track
+
+class AnimationOptimizer:
+ """Main class for animation optimization with video guidance."""
+
+ def __init__(self, args, device = 'cuda:0'):
+ self.args = args
+ self.device = device
+ self.logger = self._setup_logger()
+
+ # Training parameters
+ self.reinit_patience_threshold = 20
+ self.loss_divergence_factor = 2.0
+ self.gradient_clip_norm = 1.0
+
+ # Loss weights
+ self.target_ratios = {
+ 'rgb': args.rgb_wt,
+ 'flow': args.flow_wt,
+ 'proj_joint': args.proj_joint_wt,
+ 'proj_vert': args.proj_vert_wt,
+ 'depth': args.depth_wt,
+ 'mask': args.mask_wt
+ }
+ self.loss_weights = {
+ 'rgb': 1.0,
+ 'flow': 1.0,
+ 'proj_joint': 1.0,
+ 'proj_vert': 1.0,
+ 'depth': 1.0,
+ 'mask': 1.0
+ }
+
+ def _setup_logger(self):
+ """Set up logging configuration."""
+ logger = logging.getLogger("animation_optimizer")
+ logger.setLevel(logging.INFO)
+
+ if not logger.handlers:
+ formatter = logging.Formatter(
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+ )
+ console_handler = logging.StreamHandler()
+ console_handler.setFormatter(formatter)
+ logger.addHandler(console_handler)
+
+ return logger
+ def _add_file_handler(self, log_path):
+ """Add file handler to logger."""
+ file_handler = logging.FileHandler(log_path)
+ formatter = logging.Formatter("%(asctime)s %(message)s")
+ file_handler.setFormatter(formatter)
+ self.logger.addHandler(file_handler)
+
+ def _initialize_parameters(self, batch_size, num_joints):
+ """Initialize optimization parameters."""
+
+ # Fixed first frame quaternions (identity)
+ fixed_quat_0 = torch.zeros((1, num_joints, 4), device=self.device)
+ fixed_quat_0[..., 0] = 1.0
+
+ # Initialize learnable quaternions for frames 1 to B-1
+ learn_quats_init = torch.zeros((batch_size - 1, num_joints, 4), device=self.device)
+ learn_quats_init[..., 0] = 1.0
+ quats_to_optimize = learn_quats_init.clone().detach().requires_grad_(True)
+
+ # Initialize global transformations
+ fixed_global_quat_0 = torch.zeros((1, 4), device=self.device)
+ fixed_global_quat_0[:, 0] = 1.0
+ fixed_global_trans_0 = torch.zeros((1, 3), device=self.device)
+
+ # Initialize learnable global transformations
+ global_quats_init = torch.zeros((batch_size - 1, 4), device=self.device)
+ global_quats_init[:, 0] = 1.0
+ global_trans_init = torch.zeros((batch_size - 1, 3), device=self.device)
+
+ global_quats = global_quats_init.clone().detach().requires_grad_(True)
+ global_trans = global_trans_init.clone().detach().requires_grad_(True)
+
+ return quats_to_optimize, global_quats, global_trans, fixed_quat_0, fixed_global_quat_0, fixed_global_trans_0
+
+ def _setup_optimizer_and_scheduler(self, quats_to_optimize, global_quats, global_trans, n_iters):
+ """Set up optimizer and learning rate scheduler."""
+
+ base_lr = self.args.warm_lr
+ max_lr = self.args.lr
+ warmup_steps = 20
+
+ min_lr = self.args.min_lr
+ quat_lr = base_lr # *2
+
+ optimizer = torch.optim.AdamW([
+ {'params': quats_to_optimize, 'lr': quat_lr},
+ {'params': global_quats, 'lr': quat_lr},
+ {'params': global_trans, 'lr': base_lr}
+ ])
+
+ scheduler = warmup_then_decay(
+ optimizer=optimizer,
+ total_steps=n_iters,
+ warmup_steps=warmup_steps,
+ max_lr=max_lr,
+ min_lr=min_lr,
+ base_lr=base_lr
+ )
+
+ return optimizer, scheduler
+
+ def _compute_smoothness_losses(self, quats_normed, all_global_quats_normed, all_global_trans, model):
+ """Compute various smoothness losses."""
+
+ # Rotation smoothness loss using geodesic distance
+ theta = geodesic_loss(quats_normed[1:], quats_normed[:-1])
+ rot_smoothness_loss = (theta ** 2).mean()
+
+ # Second-order rotation smoothness (acceleration)
+ omega = quat_log(quat_multiply(quat_inverse(quats_normed[:-1]), quats_normed[1:]))
+ rot_acc = omega[1:] - omega[:-1]
+ rot_acc_smoothness_loss = rot_acc.pow(2).mean()
+
+ # Joint motion coherence loss (parent-child relative motion smoothness)
+ joint_coherence_loss = joint_motion_coherence(quats_normed, model.parent_indices)
+
+ # Root motion regularization
+ root_pos_smooth_loss, root_quat_smooth_loss = root_motion_reg(
+ all_global_quats_normed, all_global_trans
+ )
+
+ return rot_smoothness_loss, rot_acc_smoothness_loss, joint_coherence_loss, root_pos_smooth_loss + root_quat_smooth_loss
+
+ def pre_calibrate_loss_weights(self, loss_components, target_ratios=None):
+ """ calibrate loss weights """
+ loss_for_ratio = {name: loss.detach().clone() for name, loss in loss_components.items()}
+
+ rgb_loss = loss_for_ratio['rgb'].item()
+
+ for name, loss_val in loss_for_ratio.items():
+ if name == 'rgb':
+ continue
+
+ if loss_val > 1e-8:
+ scale_factor = rgb_loss / loss_val.item()
+ target_ratio = target_ratios.get(name, 1.0)
+ new_weight = self.loss_weights.get(name, 1.0) * scale_factor * target_ratio
+
+ self.loss_weights[name] = new_weight
+
+ def _compute_losses(
+ self,
+ model,
+ renderer,
+ images_batch,
+ tracked_joints_2d,
+ joint_vis_mask,
+ track_verts_2d,
+ vert_vis_mask,
+ sampled_vertex_indices,
+ track_indices,
+ flow_dirs,
+ depth_gt_raw,
+ mask,
+ out_dir,
+ iteration
+ ):
+ """Compute all losses for the optimization."""
+
+ batch_size = images_batch.shape[0]
+ meshes = [model.get_mesh(t) for t in range(batch_size)]
+ pred_images_all = renderer.render_batch(meshes)
+
+ # 2D projection losses
+ pred_joints_3d = model.joint_positions
+ proj_joint_loss = compute_reprojection_loss(
+ renderer, joint_vis_mask, pred_joints_3d,
+ tracked_joints_2d, self.args.img_size
+ )
+
+ pred_points_3d = model.deformed_vertices[0]
+ proj_vert_loss = compute_reprojection_loss(
+ renderer, vert_vis_mask,
+ pred_points_3d[:, sampled_vertex_indices],
+ track_verts_2d[:, track_indices],
+ self.args.img_size
+ )
+
+ # RGB loss
+ pred_rgb = pred_images_all[..., :3]
+ real_rgb = images_batch[..., :3]
+ diff_rgb_masked = (pred_rgb - real_rgb) * mask.unsqueeze(-1)
+
+ mse_rgb_num = (diff_rgb_masked ** 2).sum()
+ mse_rgb_den = mask.sum() * 3
+ rgb_loss = mse_rgb_num / mse_rgb_den.clamp_min(1e-8)
+
+ # Mask loss
+ silhouette_soft = renderer.render_silhouette_batch(meshes).squeeze()
+ mask_loss = F.binary_cross_entropy(silhouette_soft, mask)
+
+ # Depth losses
+ fragments = renderer.get_rasterization_fragments(meshes)
+ zbuf_depths = fragments.zbuf[..., 0]
+ depth_loss = compute_depth_loss_normalized(depth_gt_raw, zbuf_depths, mask)
+
+ # Flow losses
+ flow_loss = calculate_flow_loss(flow_dirs, self.device, mask, renderer, model)
+
+ loss_components = {
+ 'rgb': rgb_loss,
+ 'proj_joint': proj_joint_loss,
+ 'proj_vert': proj_vert_loss,
+ 'depth': depth_loss,
+ 'flow': flow_loss,
+ 'mask': mask_loss
+ }
+
+ return loss_components
+
+ def optimization(
+ self,
+ images_batch,
+ model,
+ renderer,
+ tracked_joints_2d,
+ joint_vis_mask,
+ track_verts_2d,
+ vert_vis_mask,
+ sampled_vertex_indices,
+ track_indices,
+ flow_dirs,
+ n_iters,
+ out_dir):
+ """
+ Optimize animation parameters with fixed first frame.
+ """
+ torch.autograd.set_detect_anomaly(True)
+
+ batch_size, _, _, _ = images_batch.shape
+ num_joints = model.joints_rest.shape[0]
+
+ # Setup output directory and logging
+ os.makedirs(out_dir, exist_ok=True)
+ log_path = os.path.join(out_dir, "optimization.log")
+ self._add_file_handler(log_path)
+
+ # Initialize parameters
+ (quats_to_optimize, global_quats, global_trans,
+ fixed_quat_0, fixed_global_quat_0, fixed_global_trans_0) = self._initialize_parameters(batch_size, num_joints)
+
+ # Setup rest positions and bind matrices
+ rest_local_pos = compute_rest_local_positions(model.joints_rest, model.parent_indices)
+ model.initialize_bind_matrices(rest_local_pos)
+
+ # Setup optimizer and scheduler
+ optimizer, scheduler = self._setup_optimizer_and_scheduler(
+ quats_to_optimize, global_quats, global_trans, n_iters
+ )
+
+ # Initialize depth module and flow weights
+ depth_module = DepthModule(
+ encoder='vitl',
+ device=self.device,
+ input_size=images_batch.shape[1],
+ fp32=False
+ )
+
+ # Prepare masks
+ real_rgb = images_batch[..., :3]
+ threshold = 0.95
+ with torch.no_grad():
+ background_mask = (real_rgb > threshold).all(dim=-1)
+ mask = (~background_mask).float()
+
+ depth_gt_raw = prepare_depth(
+ flow_dirs.replace('flow', 'depth'), real_rgb, self.device, depth_module
+ )
+
+ # Optimization tracking
+ best_loss = float('inf')
+ patience = 0
+ best_params = None
+
+ pbar = tqdm(total=n_iters, desc="Optimizing animation")
+
+ for iteration in range(n_iters):
+ # Combine fixed and learnable parameters
+ quats_all = torch.cat([fixed_quat_0, quats_to_optimize], dim=0)
+
+ # Normalize quaternions
+ reshaped = quats_all.reshape(-1, 4)
+ norm = torch.norm(reshaped, dim=1, keepdim=True).clamp_min(1e-8)
+ quats_normed = (reshaped / norm).reshape(batch_size, num_joints, 4)
+
+ # Global transformations
+ all_global_quats = torch.cat([fixed_global_quat_0, global_quats], dim=0)
+ all_global_trans = torch.cat([fixed_global_trans_0, global_trans], dim=0)
+ all_global_quats_normed = all_global_quats / torch.norm(
+ all_global_quats, dim=-1, keepdim=True
+ ).clamp_min(1e-8)
+
+ # Compute smoothness losses
+ (rot_smoothness_loss, rot_acc_smoothness_loss, joint_coherence_loss,
+ root_smooth_loss) = self._compute_smoothness_losses(
+ quats_normed, all_global_quats_normed, all_global_trans, model
+ )
+
+ # animate model
+ model.animate(quats_normed, all_global_quats_normed, all_global_trans)
+
+ # Verify first frame hasn't changed
+ verts0 = model.vertices[0]
+ de0 = model.deformed_vertices[0][0]
+ assert torch.allclose(de0, verts0, atol=1e-2), "First frame vertices have changed!"
+
+ # Compute all losses
+ loss_components = self._compute_losses(
+ model, renderer, images_batch, tracked_joints_2d, joint_vis_mask,
+ track_verts_2d, vert_vis_mask, sampled_vertex_indices, track_indices,
+ flow_dirs, depth_gt_raw, mask, out_dir, iteration
+ )
+
+ total_smoothness_loss = rot_smoothness_loss + rot_acc_smoothness_loss * 10
+
+ if iteration == 0:
+ self.pre_calibrate_loss_weights(loss_components, self.target_ratios)
+
+ total_loss = (
+ loss_components['rgb'] +
+ self.loss_weights['mask'] * loss_components['mask'] +
+ self.loss_weights['flow'] * loss_components['flow'] +
+ self.loss_weights['proj_joint'] * loss_components['proj_joint'] +
+ self.loss_weights['proj_vert'] * loss_components['proj_vert'] +
+ self.loss_weights['depth'] * loss_components['depth'] +
+ self.args.smooth_weight * total_smoothness_loss +
+ self.args.coherence_weight * joint_coherence_loss +
+ self.args.root_smooth_weight * root_smooth_loss
+ )
+
+ # Optimization step
+ optimizer.zero_grad()
+ total_loss.backward()
+ torch.nn.utils.clip_grad_norm_(
+ [quats_to_optimize, global_quats, global_trans],
+ max_norm=self.gradient_clip_norm
+ )
+ optimizer.step()
+ scheduler.step()
+
+ # Update progress bar and logging
+ loss_desc = (
+ f"Loss: {total_loss.item():.4f}, "
+ f"RGB: {loss_components['rgb'].item():.4f}, "
+ f"Mask: {self.loss_weights['mask'] * loss_components['mask'].item():.4f}, "
+ f"Flow: {self.loss_weights['flow'] * loss_components['flow'].item():.4f}, "
+ f"Proj_joint: {self.loss_weights['proj_joint'] * loss_components['proj_joint'].item():.4f}, "
+ f"Proj_vert: {self.loss_weights['proj_vert'] * loss_components['proj_vert'].item():.4f}, "
+ f"Depth: {self.loss_weights['depth'] * loss_components['depth'].item():.4f}, "
+ f"Smooth: {self.args.smooth_weight * total_smoothness_loss.item():.4f}, "
+ f"Joint smooth: {self.args.coherence_weight * joint_coherence_loss.item():.4f}, "
+ f"Root smooth: {self.args.root_smooth_weight * root_smooth_loss.item():.4f}"
+ )
+ pbar.set_description(loss_desc)
+
+ if iteration % 5 == 0:
+ self.logger.info(f"Iter {iteration}: {loss_desc}")
+
+ # Adaptive reinitialization
+ current_loss = total_loss.item()
+ if current_loss < best_loss:
+ best_loss = current_loss
+ best_params = {
+ 'quats': quats_to_optimize.clone().detach(),
+ 'global_quats': global_quats.clone().detach(),
+ 'global_trans': global_trans.clone().detach()
+ }
+ patience = 0
+ elif (current_loss > best_loss * self.loss_divergence_factor or
+ patience > self.reinit_patience_threshold * 2):
+ # Reinitialize with best parameters
+ quats_to_optimize = best_params['quats'].clone().requires_grad_(True)
+ global_quats = best_params['global_quats'].clone().requires_grad_(True)
+ global_trans = best_params['global_trans'].clone().requires_grad_(True)
+
+ optimizer, scheduler = self._setup_optimizer_and_scheduler(
+ quats_to_optimize, global_quats, global_trans, n_iters
+ )
+ patience = 0
+ self.logger.info(f'Adaptive reset at iteration {iteration} with best loss: {best_loss:.6f}')
+ else:
+ patience += 1
+
+ pbar.update(1)
+
+ pbar.close()
+
+ # Prepare final results
+ quats_final = torch.cat([fixed_quat_0, best_params['quats']], dim=0)
+
+ # Final normalization
+ reshaped = quats_final.reshape(-1, 4)
+ norm = torch.norm(reshaped, dim=1, keepdim=True).clamp_min(1e-8)
+ quats_final = (reshaped / norm).reshape(batch_size, num_joints, 4)
+
+ global_quats_final = torch.cat([fixed_global_quat_0, best_params['global_quats']], dim=0)
+ global_trans_final = torch.cat([fixed_global_trans_0, best_params['global_trans']], dim=0)
+ global_quats_final = global_quats_final / torch.norm(
+ global_quats_final, dim=-1, keepdim=True
+ ).clamp_min(1e-8)
+
+ return quats_final, global_quats_final, global_trans_final
+
+def load_and_prepare_data(args):
+ """Load and prepare all necessary data for optimization."""
+
+ # Define paths
+ base_path = f'{args.input_path}/{args.seq_name}'
+ mesh_path = f'{base_path}/objs/mesh.obj'
+ rig_path = f'{base_path}/objs/rig.txt'
+ img_path = f'{base_path}/imgs'
+ flow_dirs = f'{base_path}/flow'
+
+ # Load model
+ model = load_model_from_obj_and_rig(mesh_path, rig_path, device=args.device)
+
+ # Load images
+ img_files = sorted(glob.glob(os.path.join(img_path, "*.png")))
+ images = []
+ for f in img_files:
+ img = Image.open(f).convert("RGBA")
+ arr = np.array(img, dtype=np.float32) / 255.0
+ t = torch.from_numpy(arr).to(args.device)
+ images.append(t)
+
+ images_batch = torch.stack(images, dim=0)
+
+ return model, images_batch, flow_dirs, img_path
+
+def setup_renderers(args):
+ """Setup multiple renderers for different camera views."""
+
+ available_views = [
+ "front", "back", "left", "right",
+ "front_left", "front_right", "back_left", "back_right"
+ ]
+
+ if args.main_renderer not in available_views:
+ raise ValueError(f"Main renderer '{args.main_renderer}' not found in available cameras: {available_views}")
+
+ main_cam_config = json.load(open(f"utils/cameras/{args.main_renderer}.json"))
+ main_renderer = MeshRenderer3D(args.device, image_size=args.img_size, cam_params=main_cam_config)
+
+ additional_views = [view.strip() for view in args.additional_renderers.split(',') if view.strip()]
+ if len(additional_views) > 3:
+ print(f"Warning: Only first 3 additional renderers will be used. Got: {additional_views}")
+ additional_views = additional_views[:3]
+
+ additional_renderers = {}
+ for view_name in additional_views:
+ if view_name in available_views and view_name != args.main_renderer:
+ cam_config = json.load(open(f"utils/cameras/{view_name}.json"))
+ renderer = MeshRenderer3D(args.device, image_size=args.img_size, cam_params=cam_config)
+ additional_renderers[f"{view_name}_renderer"] = renderer
+ elif view_name == args.main_renderer:
+ print(f"Warning: '{view_name}' is already the main renderer, skipping...")
+ elif view_name not in available_views:
+ print(f"Warning: Camera view '{view_name}' not found, skipping...")
+
+ return main_renderer, additional_renderers
+
+def get_parser():
+ """Create argument parser with all configuration options."""
+
+ parser = argparse.ArgumentParser(description="3D Rigging Optimization")
+
+ # Training parameters
+ training_group = parser.add_argument_group('Training')
+ training_group.add_argument("--iter", type=int, default=500, help="Number of training iterations")
+ training_group.add_argument("--img_size", type=int, default=512, help="Image resolution")
+ training_group.add_argument("--device", type=str, default="cuda:0", help="Device to use")
+ training_group.add_argument("--img_fps", type=int, default=15, help="Image frame rate")
+ training_group.add_argument('--main_renderer', type=str, default='front', help='Main renderer camera view (default: front)')
+ training_group.add_argument('--additional_renderers', type=str, default="back, right, left", help='Additional renderer views (max 3), comma-separated (e.g., "back,left,right"). ')
+
+ # Learning rates
+ lr_group = parser.add_argument_group('Learning Rates')
+ lr_group.add_argument("--lr", type=float, default=2e-3, help="Base learning rate")
+ lr_group.add_argument("--min_lr", type=float, default=1e-5, help="Minimum learning rate")
+ lr_group.add_argument("--warm_lr", type=float, default=1e-5, help="Warmup learning rate")
+
+ # Loss weights
+ loss_group = parser.add_argument_group('Loss Weights')
+ loss_group.add_argument("--smooth_weight", type=float, default=0.2)
+ loss_group.add_argument("--root_smooth_weight", type=float, default=1.0)
+ loss_group.add_argument("--coherence_weight", type=float, default=10)
+ loss_group.add_argument("--rgb_wt", type=float, default=1.0, help="RGB loss target ratio (relative importance)")
+ loss_group.add_argument("--mask_wt", type=float, default=1.0, help="Mask loss target ratio")
+ loss_group.add_argument("--proj_joint_wt", type=float, default=1.5, help="Joint projection loss target ratio")
+ loss_group.add_argument("--proj_vert_wt", type=float, default=3.0, help="Point projection loss target ratio")
+ loss_group.add_argument("--depth_wt", type=float, default=0.8, help="Depth loss target ratio")
+ loss_group.add_argument("--flow_wt", type=float, default=0.8, help="Flow loss target ratio")
+
+ # Data and output
+ data_group = parser.add_argument_group('Data and Output')
+ data_group.add_argument("--input_path", type=str, default="inputs")
+ data_group.add_argument("--save_path", type=str, default="results")
+ data_group.add_argument("--save_name", type=str, default="results")
+ data_group.add_argument("--seq_name", type=str, default=None)
+
+ # Flags
+ flag_group = parser.add_argument_group('Flags')
+ flag_group.add_argument('--gauss_filter', action='store_true', default=False)
+ return parser
+
+def main():
+ parser = get_parser()
+ args = parser.parse_args()
+
+ # Setup output directory
+ out_dir = f'{args.save_path}/{args.seq_name}/{args.save_name}'
+ save_args(args, out_dir)
+
+ # Initialize optimizer
+ ani_optimizer = AnimationOptimizer(args, device=args.device)
+
+ # Setup renderers
+ renderer, additional_renderers = setup_renderers(args)
+
+ # Load and prepare data
+ model, images_batch, flow_dirs, img_path = load_and_prepare_data(args)
+
+ # Setup tracking
+ joint_vis_mask = visualize_joints_on_mesh(model, renderer, args.seq_name, out_dir=out_dir)
+ joint_vis_mask = torch.from_numpy(joint_vis_mask).float().to(args.device)
+
+ joint_project_2d = renderer.project_points(model.joints_rest)
+
+ # Setup track paths
+ track_2d_path = img_path.replace('imgs', 'track_2d_joints')
+ os.makedirs(track_2d_path, exist_ok=True)
+
+ # Load or generate tracks
+ if not os.listdir(track_2d_path):
+ print("Generating joint tracks")
+ tracked_joints_2d = save_track(args.seq_name, joint_project_2d, img_path, track_2d_path, out_dir)
+ else:
+ print("Loading existing joint tracks")
+ tracked_joints_2d = np.load(f'{track_2d_path}/pred_tracks.npy')
+
+ # Setup point tracking
+ vert_vis_mask = visualize_points_on_mesh(model, renderer, args.seq_name, out_dir=out_dir)
+ vert_vis_mask = torch.from_numpy(vert_vis_mask).float().to(args.device)
+
+ track_verts_2d, track_indices, sampled_vertex_indices = save_track_points(
+ vert_vis_mask, renderer, model, img_path, out_dir, args
+ )
+ vert_vis_mask = vert_vis_mask[sampled_vertex_indices]
+
+ # Run optimization
+ print(f"Starting optimization")
+ final_quats, root_quats, root_pos = ani_optimizer.optimization(
+ images_batch=images_batch,
+ model=model,
+ renderer=renderer,
+ tracked_joints_2d=tracked_joints_2d,
+ joint_vis_mask=joint_vis_mask,
+ track_verts_2d=track_verts_2d,
+ vert_vis_mask=vert_vis_mask,
+ sampled_vertex_indices=sampled_vertex_indices,
+ track_indices=track_indices,
+ flow_dirs=flow_dirs,
+ n_iters=args.iter,
+ out_dir=out_dir
+ )
+
+ # Save results
+ save_and_smooth_results(
+ args, model, renderer, final_quats, root_quats, root_pos,
+ out_dir, additional_renderers, fps=10
+ )
+
+ print("Optimization completed successfully")
+ save_final_video(args)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/renderer.py b/third_party/Puppeteer/animation/renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c07f3b2d8faa7007fa6b5ac6277300123dba62c
--- /dev/null
+++ b/third_party/Puppeteer/animation/renderer.py
@@ -0,0 +1,348 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import torch
+import cv2
+
+from pytorch3d.structures import join_meshes_as_scene, join_meshes_as_batch, Meshes
+from pytorch3d.renderer import (
+ FoVPerspectiveCameras, look_at_view_transform,
+ RasterizationSettings, MeshRenderer, MeshRasterizer,
+ SoftPhongShader, PointLights, BlendParams, SoftSilhouetteShader
+)
+from utils.loss_utils import compute_visibility_mask_igl
+
+def create_camera_from_blender_params(cam_params, device):
+ """
+ Convert Blender camera parameters to PyTorch3D camera
+
+ Args:
+ cam_params (dict): Camera parameters from Blender JSON
+ device: Device to create camera on
+
+ Returns:
+ FoVPerspectiveCameras: Converted camera
+ """
+ # Extract matrix world and convert to rotation and translation
+ matrix_world = torch.tensor(cam_params['matrix_world'], dtype=torch.float32)
+
+ # Extract field of view (use x_fov, assuming symmetric FOV)
+ fov = cam_params['x_fov'] * 180 / np.pi # Convert radians to degrees
+
+ rotation_matrix = torch.tensor([
+ [1, 0, 0, 0],
+ [0, 0, 1, 0],
+ [0, -1, 0, 0],
+ [0, 0, 0, 1]
+ ], dtype=torch.float32)
+
+ # Apply transformations
+ adjusted_matrix = rotation_matrix @ matrix_world
+ world2cam_matrix_tensor = torch.linalg.inv(adjusted_matrix)
+
+ aligned_matrix = torch.tensor([
+ [-1.0, 0.0, 0.0, 0.0],
+ [0.0, 1.0, 0.0, 0.0],
+ [0.0, 0.0, -1.0, 0.0],
+ [0.0, 0.0, 0.0, 1.0]
+ ], dtype=torch.float32, device=device)
+ world2cam_matrix = aligned_matrix @ world2cam_matrix_tensor.to(device)
+ cam2world_matrix = torch.linalg.inv(world2cam_matrix)
+
+ # Extract rotation and translation
+ R = cam2world_matrix[:3, :3]
+ T = torch.tensor([
+ world2cam_matrix[0, 3],
+ world2cam_matrix[1, 3],
+ world2cam_matrix[2, 3]
+ ], device=device, dtype=torch.float32)
+
+ return FoVPerspectiveCameras(
+ device=device,
+ fov=fov,
+ R=R[None],
+ T=T[None],
+ znear=0.1,
+ zfar=100.0
+ )
+
+class MeshRenderer3D:
+ """
+ PyTorch3D mesh renderer with support for various rendering modes.
+
+ Features:
+ - Standard mesh rendering with Phong shading
+ - Silhouette rendering
+ - Multi-frame batch rendering
+ - Point projection with visibility computation
+ """
+ def __init__(self, device, image_size=1024, cam_params=None, light_params=None, raster_params=None):
+ self.device = device
+ # Initialize camera
+ self.camera = self._setup_camera(cam_params)
+
+ # Initialize light
+ self.light = self._setup_light(light_params)
+
+ # Initialize rasterization settings
+ self.raster_settings = self._setup_raster_settings(raster_params, image_size)
+ self.camera.image_size = self.raster_settings.image_size
+
+ # Initialize renderers
+ self._setup_renderers()
+
+ def _setup_camera(self, cam_params):
+ """Setup camera based on parameters."""
+ if cam_params is None:
+ # Default camera
+ R, T = look_at_view_transform(3.0, 30, 20, at=[[0.0, 1.0, 0.0]])
+ return FoVPerspectiveCameras(device=self.device, R=R, T=T)
+
+ # Check if Blender parameters
+ if "matrix_world" in cam_params and "x_fov" in cam_params:
+ return create_camera_from_blender_params(cam_params, self.device)
+ else:
+ raise ValueError("Need to provide blender parameters.")
+
+ def _setup_light(self, light_params):
+ """Setup light source."""
+ if light_params is None:
+ return PointLights(device=self.device, location=[[0.0, 0.0, 3.0]])
+
+ location = [[
+ light_params.get('light_x', 0.0),
+ light_params.get('light_y', 0.0),
+ light_params.get('light_z', 3.0)
+ ]]
+ return PointLights(device=self.device, location=location)
+
+ def _setup_raster_settings(self, raster_params, default_size):
+ """Setup rasterization settings."""
+ if raster_params is None:
+ raster_params = {
+ "image_size": [default_size, default_size],
+ "blur_radius": 0.0,
+ "faces_per_pixel": 1,
+ "bin_size": 0,
+ "cull_backfaces": False
+ }
+
+ return RasterizationSettings(**raster_params)
+
+ def _setup_renderers(self) -> None:
+ """Initialize main and silhouette renderers."""
+ rasterizer = MeshRasterizer(
+ cameras=self.camera,
+ raster_settings=self.raster_settings
+ )
+
+ # Main renderer with Phong shading
+ self.renderer = MeshRenderer(
+ rasterizer=rasterizer,
+ shader=SoftPhongShader(
+ device=self.device,
+ cameras=self.camera,
+ lights=self.light
+ )
+ )
+
+ # Silhouette renderer
+ blend_params = BlendParams(
+ sigma=1e-4,
+ gamma=1e-4,
+ background_color=(0.0, 0.0, 0.0)
+ )
+
+ self.silhouette_renderer = MeshRenderer(
+ rasterizer=rasterizer,
+ shader=SoftSilhouetteShader(blend_params=blend_params)
+ )
+
+ def render(self, meshes):
+ """
+ Render meshes with Phong shading.
+
+ Args:
+ meshes: Single mesh or list of meshes
+
+ Returns:
+ Rendered images tensor of shape (1, H, W, C)
+ """
+ scene_mesh = self._prepare_scene_mesh(meshes)
+ return self.renderer(scene_mesh)
+
+ def render_batch(self, mesh_list):
+ """
+ Render multiple frames as a batch.
+
+ Args:
+ mesh_list: List of mesh lists (one per frame)
+
+ Returns:
+ Batch of rendered images of shape (B, H, W, C)
+ """
+ assert isinstance(mesh_list, list)
+
+ batch_meshes = []
+ for frame_meshes in mesh_list:
+ scene_mesh = self._prepare_scene_mesh(frame_meshes)
+ batch_meshes.append(scene_mesh)
+
+ batch_mesh = join_meshes_as_batch(batch_meshes)
+ return self.renderer(batch_mesh)
+
+ def get_rasterization_fragments(self, mesh_list):
+ """
+ Get rasterization fragments for batch of meshes.
+
+ Args:
+ mesh_list: List of mesh lists (one per frame)
+
+ Returns:
+ Rasterization fragments
+ """
+ assert isinstance(mesh_list, list)
+
+ batch_meshes = []
+ for frame_meshes in mesh_list:
+ scene_mesh = self._prepare_scene_mesh(frame_meshes)
+ batch_meshes.append(scene_mesh)
+
+ batch_mesh = join_meshes_as_batch(batch_meshes)
+ return self.renderer.rasterizer(batch_mesh)
+
+ def render_silhouette_batch(self, mesh_list):
+ """
+ Render silhouette masks for multiple frames.
+
+ Args:
+ mesh_list: List of mesh lists (one per frame)
+
+ Returns:
+ Batch of silhouette masks of shape (B, H, W, 1)
+ """
+ assert isinstance(mesh_list, list)
+
+ batch_meshes = []
+ for frame_meshes in mesh_list:
+ scene_mesh = self._prepare_scene_mesh(frame_meshes)
+ batch_meshes.append(scene_mesh)
+
+ batch_mesh = join_meshes_as_batch(batch_meshes)
+ silhouette = self.silhouette_renderer(batch_mesh)
+ return silhouette[..., 3:] # Return alpha channel
+
+ def tensor_to_image(self, tensor):
+ """
+ Convert rendered tensor to numpy image array.
+
+ Args:
+ tensor: Rendered tensor of shape (B, H, W, C)
+
+ Returns:
+ Numpy array of shape (H, W, 3) with values in [0, 255]
+ """
+ return (tensor[0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
+
+ def project_points(self, points_3d):
+ """
+ Project 3D joints/vertices to 2D image plane
+
+ Args:
+ points_3d: shape (N, 3) or (B, N, 3) tensor of 3D points
+
+ Returns:
+ points_2d: shape (N, 2) or (B, N, 2) tensor of 2D projected points
+ """
+ if not torch.is_tensor(points_3d):
+ points_3d = torch.tensor(points_3d, device=self.device, dtype=torch.float32)
+
+
+ if len(points_3d.shape) == 2:
+ points_3d = points_3d.unsqueeze(0) # (1, N, 3)
+
+ # project points
+ projected = self.camera.transform_points_screen(points_3d, image_size=self.raster_settings.image_size)
+
+ if projected.shape[0] == 1:
+ projected_points = projected.squeeze(0)[:, :2]
+ else:
+ projected_points = projected[:, :, :2]
+ return projected_points
+
+ def render_with_points(self, meshes, points_3d, point_radius=3, for_vertices=False):
+ """
+ render the mesh and visualize the joints/vertices on the image
+
+ Args:
+ meshes: mesh or list of meshes to be rendered
+ points_3d: shape (N, 3) tensor of 3D joints/vertices
+ point_radius: radius of the drawn points
+ for_vertices: if True, compute visibility for vertices, else for joints
+
+ Returns:
+ Image with joints/vertices drawn, visibility mask
+ """
+ rendered_image = self.render(meshes)
+
+ # project 3D points to 2D
+ points_2d = self.project_points(points_3d)
+
+ image_np = rendered_image[0, ..., :3].cpu().numpy()
+ image_with_points = image_np.copy()
+ height, width = image_np.shape[:2]
+
+ ray_origins = self.camera.get_camera_center() # (B, 3)
+ ray_origins = np.tile(ray_origins.detach().cpu().numpy(), (points_3d.shape[0], 1))
+
+ verts = meshes.verts_packed().detach().cpu().numpy()
+ faces = meshes.faces_packed().detach().cpu().numpy()
+
+ ray_dirs = points_3d.detach().cpu().numpy() - ray_origins # calculate ray directions
+ distances = np.linalg.norm(ray_dirs, axis=1) # distances from camera to points
+ ray_dirs = (ray_dirs.T / distances).T # normalize to unit vectors
+
+ vis_mask = compute_visibility_mask_igl(ray_origins, ray_dirs, distances, verts, faces, distance_tolerance=1e-6, for_vertices=for_vertices)
+
+ # draw points
+ visible_color=(1, 0, 0) # visible points are red
+ invisible_color=(0, 0, 1) # invisible points are blue
+ for i, point in enumerate(points_2d):
+ x, y = int(point[0].item()), int(point[1].item())
+
+ if 0 <= x < width and 0 <= y < height:
+ point_color = visible_color if vis_mask[i] else invisible_color
+ cv2.circle(image_with_points, (x, y), point_radius, point_color, -1)
+
+ result = torch.from_numpy(image_with_points).to(self.device)
+ result = result.unsqueeze(0)
+
+ if rendered_image.shape[-1] == 4:
+ alpha = rendered_image[..., 3:]
+ result = torch.cat([result, alpha], dim=-1)
+
+ return result, vis_mask
+
+ def _prepare_scene_mesh(self, meshes):
+ """Convert meshes to a single scene mesh."""
+ if isinstance(meshes, Meshes):
+ return meshes
+ elif isinstance(meshes, list):
+ return join_meshes_as_scene(meshes)
+ else:
+ raise ValueError("meshes must be Meshes object or list of Meshes")
+
+
+
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/back.json b/third_party/Puppeteer/animation/utils/cameras/back.json
new file mode 100644
index 0000000000000000000000000000000000000000..8ab578de6c7a2cbddb51e72be37ae8743dd0e6c5
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/back.json
@@ -0,0 +1 @@
+{"matrix_world": [[-1.0, -8.742277657347586e-08, -8.742277657347586e-08, 0.0], [-8.742277657347586e-08, 4.371138118131057e-08, 1.0, 2.0], [-8.74227694680485e-08, 1.0, -4.371138828673793e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [0.0, 2.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [-1.0, -8.742277657347586e-08, -8.74227694680485e-08], "y": [8.742277657347586e-08, -4.371138118131057e-08, -1.0], "z": [8.742277657347586e-08, -1.0, 4.371138828673793e-08]}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/back_left.json b/third_party/Puppeteer/animation/utils/cameras/back_left.json
new file mode 100644
index 0000000000000000000000000000000000000000..27ca42362240c267872335fe88826089cde101d8
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/back_left.json
@@ -0,0 +1,64 @@
+{
+ "matrix_world": [
+ [
+ -0.7071067235853873,
+ -1.5015359289272112e-08,
+ -0.7071068387877032,
+ -1.4142136775754064
+ ],
+ [
+ -0.7071068387877031,
+ -1.1886282763606815e-08,
+ 0.7071067235853874,
+ 1.4142134471707748
+ ],
+ [
+ -1.9022333375140477e-08,
+ 1.0,
+ -2.2125928034189e-09,
+ -4.4251856068378e-09
+ ],
+ [
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0
+ ]
+ ],
+ "format_version": 6,
+ "max_depth": 5.0,
+ "bbox": [
+ [
+ -0.14632226526737213,
+ -0.15228690207004547,
+ -0.5013949275016785
+ ],
+ [
+ 0.18149489164352417,
+ 0.24675098061561584,
+ 0.4873228073120117
+ ]
+ ],
+ "origin": [
+ -1.0,
+ 1.0,
+ 0.0
+ ],
+ "x_fov": 0.6911112070083618,
+ "y_fov": 0.6911112070083618,
+ "x": [
+ -0.7071067235853873,
+ -1.5015359289272112e-08,
+ -0.7071068387877032
+ ],
+ "y": [
+ -0.7071068387877031,
+ -1.1886282763606815e-08,
+ 0.7071067235853874
+ ],
+ "z": [
+ -1.9022333375140477e-08,
+ 1.0,
+ -2.2125928034189e-09
+ ]
+}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/back_right.json b/third_party/Puppeteer/animation/utils/cameras/back_right.json
new file mode 100644
index 0000000000000000000000000000000000000000..8bf6839949f981c9eb503fea70209df93f561a2a
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/back_right.json
@@ -0,0 +1,64 @@
+{
+ "matrix_world": [
+ [
+ -0.7071067854026265,
+ -7.240741267677819e-08,
+ 0.7071067769704649,
+ 1.4142135539409297
+ ],
+ [
+ 0.7071067769704653,
+ 2.4325415404202744e-08,
+ 0.7071067854026294,
+ 1.4142135708052588
+ ],
+ [
+ -6.840043892397674e-08,
+ 0.9999999999999971,
+ 3.399910591950217e-08,
+ 6.799821183900434e-08
+ ],
+ [
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0
+ ]
+ ],
+ "format_version": 6,
+ "max_depth": 5.0,
+ "bbox": [
+ [
+ -0.14632226526737213,
+ -0.15228690207004547,
+ -0.5013949275016785
+ ],
+ [
+ 0.18149489164352417,
+ 0.24675098061561584,
+ 0.4873228073120117
+ ]
+ ],
+ "origin": [
+ 1.0,
+ 1.0,
+ 0.0
+ ],
+ "x_fov": 0.6911112070083618,
+ "y_fov": 0.6911112070083618,
+ "x": [
+ -0.7071067854026265,
+ -7.240741267677819e-08,
+ 0.7071067769704649
+ ],
+ "y": [
+ 0.7071067769704653,
+ 2.4325415404202744e-08,
+ 0.7071067854026294
+ ],
+ "z": [
+ -6.840043892397674e-08,
+ 0.9999999999999971,
+ 3.399910591950217e-08
+ ]
+}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/front.json b/third_party/Puppeteer/animation/utils/cameras/front.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b0cd0cf0bd4c497c19d066b26f90d7328f1884a
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/front.json
@@ -0,0 +1 @@
+{"matrix_world": [[1.0, 0.0, 0.0, 0.0], [0.0, -4.371138828673793e-08, -1.0, -2.0], [0.0, 1.0, -4.371138828673793e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [0.0, -2.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [1.0, 0.0, 0.0], "y": [-0.0, 4.371138828673793e-08, -1.0], "z": [-0.0, 1.0, 4.371138828673793e-08]}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/front_left.json b/third_party/Puppeteer/animation/utils/cameras/front_left.json
new file mode 100644
index 0000000000000000000000000000000000000000..bd49fc349cb64448d52ac1707a18ba6e5579a9f6
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/front_left.json
@@ -0,0 +1,64 @@
+{
+ "matrix_world": [
+ [
+ 0.7071068078790848,
+ 2.869602372390645e-08,
+ -0.7071067544940086,
+ -1.4142135089880172
+ ],
+ [
+ -0.7071067544940088,
+ -6.21956508517485e-09,
+ -0.7071068078790852,
+ -1.4142136157581704
+ ],
+ [
+ -2.468905024866075e-08,
+ 0.9999999999999996,
+ 1.589325537842967e-08,
+ 3.178651075685934e-08
+ ],
+ [
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0
+ ]
+ ],
+ "format_version": 6,
+ "max_depth": 5.0,
+ "bbox": [
+ [
+ -0.14632226526737213,
+ -0.15228690207004547,
+ -0.5013949275016785
+ ],
+ [
+ 0.18149489164352417,
+ 0.24675098061561584,
+ 0.4873228073120117
+ ]
+ ],
+ "origin": [
+ -1.0,
+ -1.0,
+ 0.0
+ ],
+ "x_fov": 0.6911112070083618,
+ "y_fov": 0.6911112070083618,
+ "x": [
+ 0.7071068078790848,
+ 2.869602372390645e-08,
+ -0.7071067544940086
+ ],
+ "y": [
+ -0.7071067544940088,
+ -6.21956508517485e-09,
+ -0.7071068078790852
+ ],
+ "z": [
+ -2.468905024866075e-08,
+ 0.9999999999999996,
+ 1.589325537842967e-08
+ ]
+}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/front_right.json b/third_party/Puppeteer/animation/utils/cameras/front_right.json
new file mode 100644
index 0000000000000000000000000000000000000000..9aab875dd260360db5d648065ac487861ea9f199
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/front_right.json
@@ -0,0 +1,64 @@
+{
+ "matrix_world": [
+ [
+ 0.7071068078790848,
+ -2.869602372390645e-08,
+ 0.7071067544940086,
+ 1.4142135089880172
+ ],
+ [
+ 0.7071067544940088,
+ -6.21956508517485e-09,
+ -0.7071068078790852,
+ -1.4142136157581704
+ ],
+ [
+ 2.468905024866075e-08,
+ 0.9999999999999996,
+ 1.589325537842967e-08,
+ 3.178651075685934e-08
+ ],
+ [
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0
+ ]
+ ],
+ "format_version": 6,
+ "max_depth": 5.0,
+ "bbox": [
+ [
+ -0.14632226526737213,
+ -0.15228690207004547,
+ -0.5013949275016785
+ ],
+ [
+ 0.18149489164352417,
+ 0.24675098061561584,
+ 0.4873228073120117
+ ]
+ ],
+ "origin": [
+ 1.0,
+ -1.0,
+ 0.0
+ ],
+ "x_fov": 0.6911112070083618,
+ "y_fov": 0.6911112070083618,
+ "x": [
+ 0.7071068078790848,
+ -2.869602372390645e-08,
+ 0.7071067544940086
+ ],
+ "y": [
+ 0.7071067544940088,
+ -6.21956508517485e-09,
+ -0.7071068078790852
+ ],
+ "z": [
+ 2.468905024866075e-08,
+ 0.9999999999999996,
+ 1.589325537842967e-08
+ ]
+}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/left.json b/third_party/Puppeteer/animation/utils/cameras/left.json
new file mode 100644
index 0000000000000000000000000000000000000000..12aec0ea218289147fba1650a6b006fce7a7adae
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/left.json
@@ -0,0 +1 @@
+{"matrix_world": [[7.549790126404332e-08, 7.549790126404332e-08, -1.0, -2.0], [-1.0, 5.6999328827277325e-15, -7.549790126404332e-08, 0.0], [0.0, 1.0, 7.549790126404332e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [-2.0, 0.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [7.549790126404332e-08, -1.0, 0.0], "y": [-7.549790126404332e-08, -5.6999328827277325e-15, -1.0], "z": [1.0, 7.549790126404332e-08, -7.549790126404332e-08]}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/cameras/right.json b/third_party/Puppeteer/animation/utils/cameras/right.json
new file mode 100644
index 0000000000000000000000000000000000000000..994caffe6ec53aaef5ef338ae5203c0b4d7d8061
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/cameras/right.json
@@ -0,0 +1 @@
+{"matrix_world": [[7.549790126404332e-08, -7.549790126404332e-08, 1.0, 2.0], [1.0, 5.6999328827277325e-15, -7.549790126404332e-08, 0.0], [0.0, 1.0, 7.549790126404332e-08, 0.0], [0.0, 0.0, 0.0, 1.0]], "format_version": 6, "max_depth": 5.0, "bbox": [[-0.14632226526737213, -0.15228690207004547, -0.5013949275016785], [0.18149489164352417, 0.24675098061561584, 0.4873228073120117]], "origin": [2.0, 0.0, 0.0], "x_fov": 0.6911112070083618, "y_fov": 0.6911112070083618, "x": [7.549790126404332e-08, 1.0, 0.0], "y": [7.549790126404332e-08, -5.6999328827277325e-15, -1.0], "z": [-1.0, 7.549790126404332e-08, -7.549790126404332e-08]}
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/data_loader.py b/third_party/Puppeteer/animation/utils/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a4d3c77ab24f9bde7e1c5b4ad87aaed07bd11cd
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/data_loader.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import torch
+import random
+from pytorch3d.io import load_objs_as_meshes, load_obj
+from pytorch3d.renderer import TexturesAtlas
+from pytorch3d.structures import Meshes
+from model import RiggingModel
+
+def prepare_depth(depth_path, input_frames, device, depth_model):
+ os.makedirs(depth_path, exist_ok=True)
+ depth_path = f"{depth_path}/depth_gt_raw.pt"
+ if os.path.exists(depth_path):
+ print("load GT depth...")
+ depth_gt_raw = torch.load(depth_path, map_location=device)
+ else:
+ print("run VideoDepthAnything and save.")
+ with torch.no_grad():
+ depth_gt_raw = depth_model.get_depth_maps(input_frames)
+ torch.save(depth_gt_raw.cpu(), depth_path)
+ depth_gt_raw = depth_gt_raw.to(device)
+ return depth_gt_raw
+
+def normalize_vertices(verts):
+ """Normalize vertices to a unit cube."""
+ vmin, vmax = verts.min(dim=0).values, verts.max(dim=0).values
+ center = (vmax + vmin) / 2.0
+ scale = (vmax - vmin).max()
+ verts_norm = (verts - center) / scale
+ return verts_norm, center, scale
+
+def build_atlas_texture(obj_path, atlas_size, device):
+ """Load OBJ + materials and bake all textures into a single atlas."""
+ verts, faces, aux = load_obj(
+ obj_path,
+ device=device,
+ load_textures=True,
+ create_texture_atlas=True,
+ texture_atlas_size=atlas_size,
+ texture_wrap="repeat",
+ )
+ atlas = aux.texture_atlas # (F, R, R, 3)
+ verts_norm, _, _ = normalize_vertices(verts)
+ mesh_atlas = Meshes(
+ verts=[verts_norm],
+ faces=[faces.verts_idx],
+ textures=TexturesAtlas(atlas=[atlas]),
+ )
+ return mesh_atlas
+
+def read_rig_file(file_path):
+ """
+ Read rig from txt file, our format is the same as RigNet:
+ joints joint_name x y z
+ root root_joint_name
+ skin vertex_idx joint_name weight joint_name weight ...
+ hier parent_joint_name child_joint_name
+ """
+ joints = []
+ bones = []
+ joint_names = []
+
+ joint_mapping = {}
+ joint_index = 0
+
+ skinning_data = {} # Dictionary to store vertex index -> [(joint_idx, weight), ...]
+
+ with open(file_path, 'r') as file:
+ lines = file.readlines()
+
+ for line in lines:
+ parts = line.split()
+ if line.startswith('joints'):
+ name = parts[1]
+ position = [float(parts[2]), float(parts[3]), float(parts[4])]
+ joints.append(position)
+ joint_names.append(name)
+ joint_mapping[name] = joint_index
+ joint_index += 1
+ elif line.startswith('hier'):
+ parent_joint = joint_mapping[parts[1]]
+ child_joint = joint_mapping[parts[2]]
+ bones.append([parent_joint, child_joint])
+ elif line.startswith('root'):
+ root = joint_mapping[parts[1]]
+ elif line.startswith('skin'):
+ vertex_idx = int(parts[1])
+
+ if vertex_idx not in skinning_data:
+ skinning_data[vertex_idx] = []
+
+ for i in range(2, len(parts), 2):
+ if i+1 < len(parts):
+ joint_name = parts[i]
+ weight = float(parts[i+1])
+
+ if joint_name in joint_mapping:
+ joint_idx = joint_mapping[joint_name]
+ skinning_data[vertex_idx].append((joint_idx, weight))
+
+ return np.array(joints), np.array(bones), root, joint_names, skinning_data
+
+def load_model_from_obj_and_rig(
+ mesh_path: str,
+ rig_path: str,
+ device: str | torch.device = "cuda",
+ use_skin_color: bool = True,
+ atlas_size: int = 8,
+):
+ """Load a 3D model from OBJ and rig files."""
+
+ # 1) read raw mesh
+ raw_mesh = load_objs_as_meshes([mesh_path], device=device)
+ verts_raw = raw_mesh.verts_packed() # (V,3)
+ faces_idx = raw_mesh.faces_packed() # (F,3)
+
+ # 2) read rig data
+ joints_np, bones_np, root_idx, joint_names, skinning_data = read_rig_file(rig_path)
+ J = joints_np.shape[0]
+
+ # parent indices, default -1
+ parent_idx = [-1] * J
+ for p, c in bones_np:
+ parent_idx[c] = p
+
+ verts_norm, center, scale = normalize_vertices(verts_raw)
+ joints_t = torch.as_tensor(joints_np, dtype=torch.float32, device=device)
+ joints_norm = (joints_t - center) / scale
+
+ # skin weights tensor (V,J)
+ V = verts_raw.shape[0]
+ skin_weights = torch.zeros(V, J, dtype=torch.float32, device=device)
+ for v_idx, lst in skinning_data.items():
+ for j_idx, w in lst:
+ skin_weights[v_idx, j_idx] = w
+
+ # 3) texture strategy
+ mesh_norm = build_atlas_texture(mesh_path, atlas_size, device)
+ tex = mesh_norm.textures
+
+ # 4) pack into Model class
+ model = RiggingModel(device=device)
+ model.vertices = [mesh_norm.verts_packed()]
+ model.faces = [faces_idx]
+ model.textures = [tex]
+
+ # rig meta
+ model.bones = bones_np # (B,2)
+ model.parent_indices = parent_idx
+ model.root_index = root_idx
+ model.skin_weights = [skin_weights]
+
+ model.bind_matrices_inv = torch.eye(4, device=device).unsqueeze(0).expand(J, -1, -1).contiguous()
+ model.joints_rest = joints_norm
+
+ return model
diff --git a/third_party/Puppeteer/animation/utils/loss_utils.py b/third_party/Puppeteer/animation/utils/loss_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c1fa7378bbcefa37c06db394df972527846e8e0
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/loss_utils.py
@@ -0,0 +1,420 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from third_partys.Video_Depth_Anything.video_depth_anything.video_depth import VideoDepthAnything
+import torch
+import torch.nn as nn
+import numpy as np
+import igl
+import cv2
+import time
+import torch.nn.functional as F
+from utils.quat_utils import quat_inverse, quat_log, quat_multiply, normalize_quaternion
+from pytorch3d.structures import join_meshes_as_scene, join_meshes_as_batch
+import os
+from pathlib import Path
+
+class DepthModule:
+ def __init__(self, encoder='vitl', device='cuda', input_size=518, fp32=False):
+ """
+ Initialize the depth loss module with Video Depth Anything
+
+ Args:
+ encoder: 'vitl' or 'vits'
+ device: device to run the model on
+ input_size: input size for the model
+ fp32: whether to use float32 for inference
+ """
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+ self.input_size = input_size
+ self.fp32 = fp32
+
+ # Initialize model configuration
+ model_configs = {
+ 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
+ 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
+ }
+
+ # Load Video Depth Anything model
+ self.video_depth_model = VideoDepthAnything(**model_configs[encoder])
+ self.video_depth_model.load_state_dict(
+ torch.load(f'./third_partys/Video_Depth_Anything/ckpt/video_depth_anything_{encoder}.pth', map_location='cpu'),
+ strict=True
+ )
+ self.video_depth_model = self.video_depth_model.to(self.device).eval()
+ for param in self.video_depth_model.parameters():
+ param.requires_grad = False
+
+ def get_depth_maps(self, frames, target_fps=30):
+ """
+ Get depth maps for video frames
+ """
+ depths, _ = self.video_depth_model.infer_video_depth(
+ frames,
+ target_fps,
+ input_size=self.input_size,
+ device=self.device,
+ fp32=self.fp32
+ )
+ return depths
+
+def save_depth_as_images(depth_np, output_dir='./depth_images'):
+ """save depth images"""
+ os.makedirs(output_dir, exist_ok=True)
+
+ for i, depth_map in enumerate(depth_np):
+ depth_map = depth_map.detach().cpu().numpy()
+ valid_mask = (depth_map > 0)
+ if not valid_mask.any():
+ continue
+
+ valid_min = depth_map[valid_mask].min()
+ valid_max = depth_map[valid_mask].max()
+
+ normalized = np.zeros_like(depth_map)
+ normalized[valid_mask] = 255.0 * (depth_map[valid_mask] - valid_min) / (valid_max - valid_min)
+
+ depth_img = normalized.astype(np.uint8)
+
+ cv2.imwrite(os.path.join(output_dir, f'depth_{i:04d}.png'), depth_img)
+
+ print(f"Save {len(depth_np)} depth images to {output_dir}")
+
+def compute_visibility_mask_igl(ray_origins, ray_dirs, distances, verts, faces, distance_tolerance=1e-6, for_vertices=False):
+ """
+ Compute visibility mask using IGL ray-mesh intersection.
+ """
+ num_rays = ray_origins.shape[0]
+ visibility_mask = np.ones(num_rays, dtype=bool)
+
+ for i in range(num_rays):
+ ray_origin = ray_origins[i].reshape(1, 3)
+ ray_dir = ray_dirs[i].reshape(1, 3)
+ intersections = igl.ray_mesh_intersect(ray_origin, ray_dir, verts, faces)
+ if intersections:
+ # Count intersections that occur before the target point
+ count = sum(1 for h in intersections if h[4] < distances[i] - distance_tolerance)
+ # count=0 → ray completely missed the mesh; count=1 → ray stops exactly at the face containing the joint
+ # count>1 → ray was blocked by other faces along the way
+ if for_vertices:
+ if count != 1:
+ visibility_mask[i] = False
+ else: # for joints
+ if count > 2:
+ visibility_mask[i] = False
+
+ return visibility_mask
+
+def compute_reprojection_loss(renderer, vis_mask, predicted_joints, tracked_joints_2d, image_size):
+ """
+ Compute reprojection loss between predicted 3D points and tracked 2D points.
+ """
+ if predicted_joints.dim() != 3:
+ raise ValueError(f"predicted_joints must be 3D tensor, got shape {predicted_joints.shape}")
+
+ B, J, _ = predicted_joints.shape
+ device = predicted_joints.device
+
+ # Project 3D joints to 2D screen coordinates
+ projected = renderer.camera.transform_points_screen(
+ predicted_joints,
+ image_size=[image_size, image_size]
+ )
+ projected_2d = projected[..., :2] # (B, J, 2)
+
+ # Convert and validate tracked joints
+ if not isinstance(tracked_joints_2d, torch.Tensor):
+ tracked_joints_2d = torch.from_numpy(tracked_joints_2d).float()
+ tracked_joints_2d = tracked_joints_2d.to(device)
+
+ if tracked_joints_2d.dim() == 2:
+ tracked_joints_2d = tracked_joints_2d.unsqueeze(0).expand(B, -1, -1)
+
+ vis_mask = vis_mask.to(device).float()
+
+ num_visible = vis_mask.sum()
+ if num_visible == 0:
+ # No visible joints - return zero loss
+ return torch.tensor(0.0, device=device, requires_grad=True)
+
+ squared_diff = (projected_2d - tracked_joints_2d).pow(2).sum(dim=-1) # (B, J)
+
+ vis_mask_expanded = vis_mask.unsqueeze(0) # (1, J)
+ masked_loss = squared_diff * vis_mask_expanded # (B, J)
+ per_frame_loss = masked_loss.sum(dim=1) / num_visible # (B,)
+ final_loss = per_frame_loss.mean() # scalar
+
+ return final_loss
+
+def geodesic_loss(q1, q2, eps=1e-6):
+ """
+ Compute geodesic distance loss between batches of quaternions for rot smooth loss.
+ """
+ q1_norm = normalize_quaternion(q1, eps=eps)
+ q2_norm = normalize_quaternion(q2, eps=eps)
+
+ dot_product = (q1_norm * q2_norm).sum(dim=-1, keepdim=True)
+ q2_corrected = torch.where(dot_product < 0, -q2_norm, q2_norm)
+ inner_product = (q1_norm * q2_corrected).sum(dim=-1)
+
+ # Clamp to valid range for arccos to avoid numerical issues
+ inner_product_clamped = torch.clamp(inner_product, min=-1.0 + eps, max=1.0 - eps)
+ theta = 2.0 * torch.arccos(torch.abs(inner_product_clamped))
+
+ return theta
+
+def root_motion_reg(root_quats, root_pos):
+ return ((root_pos[1:] - root_pos[:-1])**2).mean(), (geodesic_loss(root_quats[1:], root_quats[:-1])**2).mean()
+
+def joint_motion_coherence(quats_normed, parent_idx):
+ """
+ Compute joint motion coherence loss to enforce smooth relative motion between parent-child joints.
+ """
+ coherence_loss = 0
+
+ for j, parent in enumerate(parent_idx):
+ if parent != -1: # Skip root joint
+ parent_rot = quats_normed[:, parent] # (T, 4)
+ child_rot = quats_normed[:, j] # (T, 4)
+
+ # Compute relative rotation of child w.r.t. parent's local frame
+ # local_rot = parent_rot^(-1) * child_rot
+ local_rot = quat_multiply(quat_inverse(parent_rot), child_rot)
+ local_rot_velocity = local_rot[1:] - local_rot[:-1] # (T-1, 4)
+
+ coherence_loss += local_rot_velocity.pow(2).mean()
+
+ return coherence_loss
+
+def read_flo_file(file_path):
+ """
+ Read optical flow from .flo format file.
+ """
+ with open(file_path, 'rb') as f:
+ magic = np.fromfile(f, np.float32, count=1)
+ if len(magic) == 0 or magic[0] != 202021.25:
+ raise ValueError(f'Invalid .flo file format: magic number {magic}')
+
+ w = np.fromfile(f, np.int32, count=1)[0]
+ h = np.fromfile(f, np.int32, count=1)[0]
+ data = np.fromfile(f, np.float32, count=2*w*h)
+ flow = data.reshape(h, w, 2)
+ return flow
+
+def load_optical_flows(flow_dir, num_frames):
+ """
+ Load sequence of optical flow files.
+ """
+ flow_dir = Path(flow_dir)
+ flows = []
+
+ for i in range(num_frames - 1):
+ flow_path = flow_dir / f'flow_{i:04d}.flo'
+ if flow_path.exists():
+ flow = read_flo_file(flow_path)
+ flows.append(flow)
+ else:
+ raise ValueError("No flow files found")
+
+ return np.stack(flows, axis=0)
+
+def rasterize_vertex_flow(flow_vertices, meshes, faces, image_size, renderer, eps = 1e-8):
+ """
+ Rasterize per-vertex flow to dense flow field using barycentric interpolation.
+ """
+ B, V, _ = flow_vertices.shape
+ device = flow_vertices.device
+
+ if isinstance(image_size, int):
+ H = W = image_size
+ else:
+ H, W = image_size
+
+ batch_meshes = join_meshes_as_batch([join_meshes_as_scene(m) for m in meshes]).to(device)
+ fragments = renderer.renderer.rasterizer(batch_meshes)
+
+ pix_to_face = fragments.pix_to_face # (B, H, W, K)
+ bary_coords = fragments.bary_coords # (B, H, W, K, 3)
+
+ flow_scene_list = []
+ for mesh_idx in range(B):
+ mesh = meshes[mesh_idx]
+ V_mesh = mesh.verts_packed().shape[0]
+
+ if V_mesh > flow_vertices.shape[1]:
+ raise ValueError(f"Mesh {mesh_idx} has {V_mesh} vertices but flow has {flow_vertices.shape[1]}")
+
+ flow_scene_list.append(flow_vertices[mesh_idx, :V_mesh])
+
+
+ flow_vertices_scene = torch.cat(flow_scene_list, dim=0).to(device)
+ faces_scene = batch_meshes.faces_packed()
+
+ flow_pred = torch.zeros(B, H, W, 2, device=device)
+ valid = pix_to_face[..., 0] >= 0
+
+ for b in range(B):
+ b_valid = valid[b] # (H,W)
+ if torch.count_nonzero(b_valid) == 0:
+ print(f"No valid pixels found for batch {b}")
+ continue
+
+ valid_indices = torch.nonzero(b_valid, as_tuple=True)
+ h_indices, w_indices = valid_indices
+
+ face_idxs = pix_to_face[b, h_indices, w_indices, 0] # (N,)
+ bary = bary_coords[b, h_indices, w_indices, 0] # (N,3)
+
+ max_face_idx = faces_scene.shape[0] - 1
+ if face_idxs.max() > max_face_idx:
+ raise RuntimeError(f"Face index {face_idxs.max()} exceeds max {max_face_idx}")
+
+ face_verts = faces_scene[face_idxs] # (N, 3)
+ f0, f1, f2 = face_verts.unbind(-1) # Each (N,)
+
+ max_vert_idx = flow_vertices_scene.shape[0] - 1
+ if max(f0.max(), f1.max(), f2.max()) > max_vert_idx:
+ raise RuntimeError(f"Vertex index exceeds flow_vertices_scene size {max_vert_idx}")
+
+ v0_flow = flow_vertices_scene[f0] # (N, 2)
+ v1_flow = flow_vertices_scene[f1] # (N, 2)
+ v2_flow = flow_vertices_scene[f2] # (N, 2)
+
+ # Interpolate using barycentric coordinates
+ b0, b1, b2 = bary.unbind(-1) # Each (N,)
+
+ # Ensure barycentric coordinates sum to 1 (numerical stability)
+ bary_sum = b0 + b1 + b2
+ b0 = b0 / (bary_sum + eps)
+ b1 = b1 / (bary_sum + eps)
+ b2 = b2 / (bary_sum + eps)
+
+ flow_interpolated = (
+ b0.unsqueeze(-1) * v0_flow +
+ b1.unsqueeze(-1) * v1_flow +
+ b2.unsqueeze(-1) * v2_flow
+ ) # (N, 2)
+
+ # Update flow prediction
+ flow_pred[b, h_indices, w_indices] = flow_interpolated
+
+ return flow_pred
+
+def calculate_flow_loss(flow_dir, device, mask, renderer, model):
+ """
+ Calculate optical flow loss with improved error handling and flexibility.
+ """
+ if device is None:
+ device = mask.device
+
+ T = mask.shape[0]
+ H, W = mask.shape[1:3]
+
+ if mask.shape[0] == T:
+ flow_mask = mask[1:] # Use frames 1 to T-1
+ else:
+ flow_mask = mask
+
+ flows_np = load_optical_flows(flow_dir, T)
+ flow_gt = torch.from_numpy(flows_np).float().to(device) # [T-1, H, W, 2]
+
+ vertices = model.deformed_vertices[0] # (T,V,3)
+ # Project vertices to get 2D flow
+ proj_t = renderer.project_points(vertices[:-1]) # (T-1,V,2) in pixels
+ proj_tp = renderer.project_points(vertices[1:])
+ vertex_flow = proj_tp - proj_t # (T-1,V,2) Δx,Δy
+
+ meshes = [model.get_mesh(t) for t in range(T)]
+ flow_pred = rasterize_vertex_flow(vertex_flow, meshes, model.faces[0], (H,W), renderer) # (B,H,W,2)
+
+ eps = 1e-3
+ diff = (flow_pred - flow_gt) * flow_mask.unsqueeze(-1) # (T-1, H, W, 2)
+ loss = torch.sqrt(diff.pow(2).sum(dim=-1) + eps**2) # Charbonnier loss
+ loss = loss.sum() / (flow_mask.sum() + 1e-6)
+
+ return loss
+
+def normalize_depth_from_reference(depth_maps, reference_idx=0, invalid_value=-1.0, invert=False, eps = 1e-8):
+ """
+ Normalize depth maps based on a reference frame with improved robustness.
+ """
+ if depth_maps.dim() != 3:
+ raise ValueError(f"Expected depth_maps with 3 dimensions, got {depth_maps.dim()}")
+
+ T, H, W = depth_maps.shape
+ device = depth_maps.device
+
+ reference_depth = depth_maps[reference_idx]
+ valid_mask = (
+ (reference_depth != invalid_value) &
+ (reference_depth > 1e-8) & # Avoid very small positive values
+ torch.isfinite(reference_depth) # Exclude inf/nan
+ )
+
+ valid_values = reference_depth[valid_mask]
+ min_depth = torch.quantile(valid_values, 0.01) # 1st percentile
+ max_depth = torch.quantile(valid_values, 0.99) # 99th percentile
+
+ depth_range = max_depth - min_depth
+ if depth_range < eps:
+ logger.warning(f"Very small depth range ({depth_range:.6f}), using fallback normalization")
+ min_depth = valid_values.min()
+ max_depth = valid_values.max()
+ depth_range = max(max_depth - min_depth, eps)
+
+ scale = 1.0 / (max_depth - min_depth)
+ offset = -min_depth * scale
+
+ all_valid_mask = (
+ (depth_maps != invalid_value) &
+ (depth_maps > eps) &
+ torch.isfinite(depth_maps)
+ )
+
+ normalized_depths = torch.full_like(depth_maps, invalid_value)
+
+ if all_valid_mask.any():
+ normalized_values = depth_maps[all_valid_mask] * scale + offset
+
+ if invert:
+ normalized_values = 1.0 - normalized_values
+
+ normalized_depths[all_valid_mask] = normalized_values
+
+ return normalized_depths, scale.item(), offset.item()
+
+def compute_depth_loss_normalized(mono_depths, zbuf_depths, mask):
+ """
+ Compute normalized depth loss.
+ """
+ device = zbuf_depths.device
+ # Normalize both depth types
+ zbuf_norm, z_scale, z_offset = normalize_depth_from_reference(zbuf_depths)
+ mono_norm, m_scale, m_offset = normalize_depth_from_reference(mono_depths, invert=True)
+
+ valid_zbuf = (zbuf_norm >= 0) & (zbuf_norm <= 1)
+ valid_mono = (mono_norm >= 0) & (mono_norm <= 1)
+ if mask.dtype != torch.bool:
+ mask = mask > 0.5
+ combined_mask = mask & valid_zbuf & valid_mono
+
+ num_valid = combined_mask.sum().item()
+ if num_valid == 0:
+ print("No valid pixels for depth loss computation")
+ return torch.tensor(0.0, device=device, requires_grad=True)
+
+ depth_diff = (zbuf_norm - mono_norm) * combined_mask.float()
+ loss = (depth_diff**2).sum() / num_valid
+
+ return loss
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/misc.py b/third_party/Puppeteer/animation/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef1539ce1d3dbed473ce5f9eb4c479f7baa28331
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/misc.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from torch.optim.lr_scheduler import LambdaLR
+
+def warmup_then_decay(optimizer, total_steps, warmup_steps, max_lr=1e-3, min_lr=1e-5, base_lr=1e-5):
+ """
+ Create a learning rate scheduler with warmup followed by decay.
+ """
+ def lr_lambda(current_step):
+ if current_step < warmup_steps:
+ # warmup: min_lr -> max_lr
+ progress = float(current_step) / float(max(1, warmup_steps))
+ # LR(t) = min_lr + (max_lr - min_lr)*progress
+ return (min_lr + (max_lr - min_lr)*progress) / base_lr
+ else:
+ # decay: warmup_steps -> total_steps
+ progress = float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
+ # LR(t) = max_lr + (min_lr - max_lr)*progress
+ return (max_lr + (min_lr - max_lr)*progress) / base_lr
+
+ scheduler = LambdaLR(optimizer, lr_lambda)
+ return scheduler
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/quat_utils.py b/third_party/Puppeteer/animation/utils/quat_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f8eb43f00eaf9d7cd16a6168213133a4e4e99e1
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/quat_utils.py
@@ -0,0 +1,179 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from typing import List, Tuple, Optional
+
+EPS = 1e-8
+
+def normalize_quaternion(quat: torch.Tensor, eps: float = EPS) -> torch.Tensor:
+ """
+ Normalize quaternions to unit length.
+
+ Args:
+ quat: Quaternion tensor of shape (..., 4) with (w, x, y, z) format
+ eps: Small value for numerical stability
+
+ Returns:
+ Normalized quaternions of same shape
+ """
+ norm = torch.norm(quat, dim=-1, keepdim=True)
+ return quat / torch.clamp(norm, min=eps)
+
+def quat_multiply(q1: torch.Tensor, q2: torch.Tensor) -> torch.Tensor:
+ """
+ Multiply two quaternions using Hamilton product.
+ """
+ w1, x1, y1, z1 = torch.unbind(q1, dim=-1)
+ w2, x2, y2, z2 = torch.unbind(q2, dim=-1)
+
+ w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
+ x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
+ y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
+ z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
+
+ return torch.stack((w, x, y, z), dim=-1)
+
+def quat_conjugate(quat: torch.Tensor) -> torch.Tensor:
+ """
+ Compute quaternion conjugate.
+ """
+ w, xyz = quat[..., :1], quat[..., 1:]
+ return torch.cat([w, -xyz], dim=-1)
+
+def quat_inverse(quat: torch.Tensor, eps: float = EPS) -> torch.Tensor:
+ """
+ Compute quaternion inverse.
+ """
+ conjugate = quat_conjugate(quat)
+ norm_squared = torch.sum(quat * quat, dim=-1, keepdim=True)
+ return conjugate / torch.clamp(norm_squared, min=eps)
+
+def quat_log(quat: torch.Tensor, eps: float = 1e-6) -> torch.Tensor:
+ """
+ Compute quaternion logarithm, mapping to rotation vectors (axis-angle).
+ """
+ # quat_norm = normalize_quaternion(quat, eps)
+ q_norm = torch.sqrt(torch.sum(quat * quat, dim=-1, keepdim=True))
+ quat_norm = quat / torch.clamp(q_norm, min=eps)
+
+ w = quat_norm[..., 0:1] # Scalar part
+ xyz = quat_norm[..., 1:] # Vector part
+
+ xyz_norm = torch.norm(xyz, dim=-1, keepdim=True)
+ w_clamped = torch.clamp(w, min=-1.0 + eps, max=1.0 - eps)
+
+ # half-angle
+ half_angle = torch.acos(torch.abs(w_clamped))
+
+ safe_xyz_norm = torch.clamp(xyz_norm, min=eps)
+
+ # Scale factor
+ scale = torch.where(
+ xyz_norm < eps,
+ torch.ones_like(xyz_norm),
+ half_angle / safe_xyz_norm
+ )
+
+ # Handle quaternion sign ambiguity (q and -q represent same rotation)
+ sign = torch.where(w >= 0, torch.ones_like(w), -torch.ones_like(w))
+
+ rotation_vector = sign * scale * xyz
+
+ return rotation_vector
+
+def quat_rotate_vector(quat: torch.Tensor, vec: torch.Tensor) -> torch.Tensor:
+ """
+ Rotate a 3D vector by a quaternion.
+ """
+ q_vec = quat[..., 1:] # vector part
+ q_w = quat[..., 0:1] # scalar part
+
+ cross1 = torch.cross(q_vec, vec, dim=-1)
+ cross2 = torch.cross(q_vec, cross1, dim=-1)
+
+ # Apply the rotation formula
+ rotated_vec = vec + 2.0 * q_w * cross1 + 2.0 * cross2
+
+ return rotated_vec
+
+def quat_to_rotation_matrix(quat: torch.Tensor, eps: float = EPS) -> torch.Tensor:
+ """
+ Convert quaternions to rotation matrices.
+ """
+ quat_norm = normalize_quaternion(quat, eps)
+ w, x, y, z = torch.unbind(quat_norm, dim=-1)
+
+ xx, yy, zz = x * x, y * y, z * z
+ xy, xz, yz = x * y, x * z, y * z
+ wx, wy, wz = w * x, w * y, w * z
+
+ r00 = 1.0 - 2.0 * (yy + zz)
+ r01 = 2.0 * (xy - wz)
+ r02 = 2.0 * (xz + wy)
+
+ r10 = 2.0 * (xy + wz)
+ r11 = 1.0 - 2.0 * (xx + zz)
+ r12 = 2.0 * (yz - wx)
+
+ r20 = 2.0 * (xz - wy)
+ r21 = 2.0 * (yz + wx)
+ r22 = 1.0 - 2.0 * (xx + yy)
+
+ rotation_matrix = torch.stack([
+ r00, r01, r02,
+ r10, r11, r12,
+ r20, r21, r22
+ ], dim=-1)
+
+ return rotation_matrix.reshape(quat.shape[:-1] + (3, 3))
+
+def quat_to_transform_matrix(quat: torch.Tensor, pos: torch.Tensor) -> torch.Tensor:
+ """
+ Convert quaternion and position to 4x4 transformation matrix.
+ """
+ # rotation part
+ rotation = quat_to_rotation_matrix(quat)
+ batch_shape = rotation.shape[:-2]
+
+ # homogeneous transformation matrix
+ transform = torch.zeros(batch_shape + (4, 4), dtype=rotation.dtype, device=rotation.device)
+ transform[..., :3, :3] = rotation
+ transform[..., :3, 3] = pos
+ transform[..., 3, 3] = 1.0
+
+ return transform
+
+def compute_rest_local_positions(
+ joint_positions: torch.Tensor,
+ parent_indices: List[int]
+) -> torch.Tensor:
+ """
+ Compute local positions relative to parent joints from global joint positions.
+ """
+
+ num_joints = joint_positions.shape[0]
+ local_positions = torch.zeros_like(joint_positions)
+
+ for j in range(num_joints):
+ parent_idx = parent_indices[j]
+
+ if parent_idx >= 0 and parent_idx != j and parent_idx < num_joints:
+ # Child joint: local offset = global_pos - parent_global_pos
+ local_positions[j] = joint_positions[j] - joint_positions[parent_idx]
+ else:
+ # Root joint: use global position as local position
+ local_positions[j] = joint_positions[j]
+
+ return local_positions
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/render_first_frame.py b/third_party/Puppeteer/animation/utils/render_first_frame.py
new file mode 100644
index 0000000000000000000000000000000000000000..94fee25099012897b1edb620d2ef827883d6ea49
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/render_first_frame.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import json
+import argparse
+from pathlib import Path
+
+import torch
+from pytorch3d.io import load_objs_as_meshes
+from pytorch3d.renderer import TexturesVertex
+from pytorch3d.structures import Meshes
+from PIL import Image
+
+from renderer import MeshRenderer3D
+from utils.save_utils import render_single_mesh
+
+
+def render_mesh_all_cameras(mesh_path, cameras_dir, output_dir="renders", image_size=512, device="cuda:0"):
+ """
+ Render mesh from all camera viewpoints in the cameras directory.
+
+ Args:
+ mesh_path: Path to OBJ mesh file
+ cameras_dir: Directory containing camera JSON config files
+ output_dir: Output directory for rendered images
+ image_size: Output image size
+ device: Device to use
+ """
+ cameras_dir = Path(cameras_dir)
+ output_dir = Path(output_dir)
+
+ # Find all JSON camera config files
+ json_files = list(cameras_dir.glob("*.json"))
+ if not json_files:
+ print(f"No JSON camera files found in {cameras_dir}")
+ return
+
+ print(f"Found {len(json_files)} camera configurations")
+
+ # Render from each camera viewpoint
+ for json_file in json_files:
+ # Load camera config
+ with open(json_file, 'r') as f:
+ cam_params = json.load(f)
+
+ # Setup renderer for this camera
+ renderer = MeshRenderer3D(device=device, image_size=image_size, cam_params=cam_params)
+
+ camera_name = json_file.stem
+ output_path = output_dir / f"render_{camera_name}.png"
+
+ render_single_mesh(renderer, mesh_path, str(output_path))
+
+ print(f"All renders saved to: {output_dir}")
+
+def main():
+ parser = argparse.ArgumentParser(description="Render a mesh to an image")
+ parser.add_argument('--input_path', type=str, help="base input path")
+ parser.add_argument('--seq_name', type=str, help="sequence name")
+ parser.add_argument("--cameras_dir", default="utils/cameras", help="Camera config JSON file")
+ parser.add_argument("-s", "--size", type=int, default=512, help="Image size")
+ parser.add_argument("-d", "--device", default="cuda:0", help="Device to use")
+
+ args = parser.parse_args()
+
+ mesh_path = f'{args.input_path}/{args.seq_name}/objs/mesh.obj'
+ if not os.path.exists(mesh_path):
+ print(f"Error: Mesh file not found: {mesh_path}")
+ output_dir = f'{args.input_path}/{args.seq_name}/first_frames/'
+ os.makedirs(output_dir, exist_ok=True)
+
+ render_mesh_all_cameras(
+ mesh_path=mesh_path,
+ cameras_dir=args.cameras_dir,
+ output_dir=output_dir,
+ image_size=args.size,
+ device=args.device
+ )
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/third_party/Puppeteer/animation/utils/save_flow.py b/third_party/Puppeteer/animation/utils/save_flow.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3528212cd99fadd795b40aa24d67f19573d5e75
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/save_flow.py
@@ -0,0 +1,297 @@
+# Copyright (c) 2021 Henrique Morimitsu
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: Apache License 2.0
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under Apache License 2.0, with the full license text
+# available at https://github.com/hmorimitsu/ptlflow/blob/main/LICENSE.
+#
+# This modified file is released under the same license.
+
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+This module processes PNG frame sequences to generate optical flow using PTLFlow,
+with support for visualization and video generation.
+"""
+
+import argparse
+import os
+import subprocess
+import shutil
+import logging
+from pathlib import Path
+from typing import List, Tuple, Optional, Union
+
+import cv2 as cv
+import torch
+import numpy as np
+from tqdm import tqdm
+
+from third_partys.ptlflow.ptlflow.utils import flow_utils
+from third_partys.ptlflow.ptlflow.utils.io_adapter import IOAdapter
+import third_partys.ptlflow.ptlflow as ptlflow
+
+class OpticalFlowProcessor:
+ """Handles optical flow computation and visualization."""
+
+ def __init__(
+ self,
+ model_name: str = 'dpflow',
+ checkpoint: str = 'sintel',
+ device: Optional[str] = None,
+ resize_to: Optional[Tuple[int, int]] = None
+ ):
+ """
+ Initialize optical flow processor.
+
+ Args:
+ model_name: Name of the flow model to use
+ checkpoint: Checkpoint/dataset name for the model
+ device: Device to run on (auto-detect if None)
+ resize_to: Optional (width, height) to resize frames
+ """
+ self.model_name = model_name
+ self.checkpoint = checkpoint
+ self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
+ self.resize_to = resize_to
+
+ # Initialize model
+ self.model = ptlflow.get_model(model_name, ckpt_path=checkpoint).to(self.device).eval()
+ print(f"Loaded {model_name} model on {self.device}")
+
+ self.io_adapter = None
+
+ def load_frame_sequence(self, frames_dir: Union[str, Path]) -> Tuple[List[np.ndarray], List[Path]]:
+ """
+ Load PNG frame sequence from directory.
+ """
+ frames_dir = Path(frames_dir)
+
+ if not frames_dir.exists():
+ raise FileNotFoundError(f"Frames directory not found: {frames_dir}")
+
+ # Find PNG files and sort naturally
+ png_files = list(frames_dir.glob('*.png'))
+ if len(png_files) < 2:
+ raise ValueError(f"Need at least 2 PNG frames, found {len(png_files)} in {frames_dir}")
+
+ # Natural sorting for proper frame order
+ png_files.sort(key=lambda x: self._natural_sort_key(x.name))
+
+ frames = []
+ for png_path in tqdm(png_files, desc="Loading frames"):
+ # Load image in color
+ img_bgr = cv.imread(str(png_path), cv.IMREAD_COLOR)
+
+ if self.resize_to:
+ img_bgr = cv.resize(img_bgr, self.resize_to, cv.INTER_LINEAR)
+
+ img_rgb = cv.cvtColor(img_bgr, cv.COLOR_BGR2RGB)
+ frames.append(img_rgb)
+
+ return frames, png_files
+
+ def _natural_sort_key(self, filename: str) -> List[Union[int, str]]:
+ """Natural sorting key for filenames with numbers."""
+ import re
+ return [int(text) if text.isdigit() else text.lower()
+ for text in re.split('([0-9]+)', filename)]
+
+ def compute_optical_flow_sequence(
+ self,
+ frames: List[np.ndarray],
+ flow_vis_dir: Union[str, Path],
+ flow_save_dir: Optional[Union[str, Path]] = None,
+ save_visualizations: bool = True
+ ) -> List[torch.Tensor]:
+ """
+ Compute optical flow for entire frame sequence.
+ """
+ if len(frames) < 2:
+ raise ValueError("Need at least 2 frames for optical flow")
+
+ flow_vis_dir = Path(flow_vis_dir)
+ flow_save_dir = Path(flow_save_dir) if flow_save_dir else flow_vis_dir
+
+ H, W = frames[0].shape[:2]
+
+ # Initialize IO adapter
+ if self.io_adapter is None:
+ self.io_adapter = IOAdapter(self.model, (H, W))
+
+ flows = []
+ for i in tqdm(range(len(frames) - 1), desc="Computing optical flow"):
+ # Prepare frame pair
+ frame_pair = [frames[i], frames[i + 1]]
+ raw_inputs = self.io_adapter.prepare_inputs(frame_pair)
+
+ imgs = raw_inputs['images'][0] # (2, 3, H, W)
+
+ pair_tensor = torch.stack((imgs[0:1], imgs[1:2]), dim=1).squeeze(0) # (1, 2, 3, H, W)
+ pair_tensor = pair_tensor.to(self.device, non_blocking=True).contiguous()
+
+ with torch.no_grad():
+ flow_result = self.model({'images': pair_tensor.unsqueeze(0)})
+ flow = flow_result['flows'][0] # (1, 2, H, W)
+
+ flows.append(flow)
+
+ if save_visualizations:
+ self._save_flow_outputs(flow, i, flow_vis_dir, flow_save_dir)
+
+ return flows
+
+ def _save_flow_outputs(
+ self,
+ flow_tensor: torch.Tensor,
+ frame_idx: int,
+ viz_dir: Path,
+ flow_dir: Path
+ ) -> None:
+ """Save flow outputs in both .flo and visualization formats."""
+ # Save raw flow (.flo format)
+ flow_hw2 = flow_tensor[0] # (2, H, W)
+ flow_np = flow_hw2.permute(1, 2, 0).cpu().numpy() # (H, W, 2)
+
+ flow_path = flow_dir / f'flow_{frame_idx:04d}.flo'
+ flow_utils.flow_write(flow_path, flow_np)
+
+ # Save visualization
+ flow_rgb = flow_utils.flow_to_rgb(flow_tensor)[0] # Remove batch dimension
+
+ if flow_rgb.dim() == 4: # (Npred, 3, H, W)
+ flow_rgb = flow_rgb[0]
+
+ flow_rgb_np = (flow_rgb * 255).byte().permute(1, 2, 0).cpu().numpy() # (H, W, 3)
+ viz_bgr = cv.cvtColor(flow_rgb_np, cv.COLOR_RGB2BGR)
+
+ viz_path = viz_dir / f'flow_viz_{frame_idx:04d}.png'
+ cv.imwrite(str(viz_path), viz_bgr)
+
+def create_flow_video(
+ image_dir: Union[str, Path],
+ output_filename: str = 'flow.mp4',
+ fps: int = 10,
+ pattern: str = 'flow_viz_*.png',
+ cleanup_temp: bool = True
+) -> bool:
+ """
+ Create MP4 video from flow visualization images.
+ """
+ image_dir = Path(image_dir)
+
+ if not image_dir.exists():
+ print(f"Image directory not found: {image_dir}")
+
+ image_files = sorted(image_dir.glob(pattern))
+ if not image_files:
+ print(f"No images found matching pattern '{pattern}' in {image_dir}")
+
+ temp_dir = image_dir / 'temp_sequence'
+ temp_dir.mkdir(exist_ok=True)
+
+ try:
+ # Copy files with sequential naming
+ for i, img_file in enumerate(image_files):
+ temp_name = temp_dir / f'frame_{i:05d}.png'
+ shutil.copy2(img_file, temp_name)
+
+ # Create video using ffmpeg
+ output_path = image_dir / output_filename
+
+ cmd = [
+ 'ffmpeg', '-y',
+ '-framerate', str(fps),
+ '-i', str(temp_dir / 'frame_%05d.png'),
+ '-c:v', 'libx264',
+ '-pix_fmt', 'yuv420p',
+ str(output_path)
+ ]
+
+ subprocess.run(
+ cmd,
+ capture_output=True,
+ text=True,
+ check=True
+ )
+ return True
+ except Exception as e:
+ print(f"Video creation failed: {e}")
+ return False
+ finally:
+ if cleanup_temp and temp_dir.exists():
+ shutil.rmtree(temp_dir)
+
+def main(
+ frames_dir: Union[str, Path],
+ flow_vis_dir: Union[str, Path] = 'flow_out',
+ flow_save_dir: Optional[Union[str, Path]] = None,
+ resize_to: Optional[Tuple[int, int]] = None,
+ model_name: str = 'dpflow',
+ checkpoint: str = 'sintel'
+) -> bool:
+
+ # Initialize processor
+ processor = OpticalFlowProcessor(
+ model_name=model_name,
+ checkpoint=checkpoint,
+ resize_to=resize_to
+ )
+
+ # Load frames
+ frames, png_paths = processor.load_frame_sequence(frames_dir)
+
+ # Compute optical flow
+ flows = processor.compute_optical_flow_sequence(
+ frames=frames,
+ flow_vis_dir=flow_vis_dir,
+ flow_save_dir=flow_save_dir,
+ save_visualizations=True
+ )
+
+ # Create video
+ create_flow_video(flow_vis_dir)
+
+def get_parser():
+ parser = argparse.ArgumentParser(description="Optical flow inference on frame sequences")
+
+ parser.add_argument('--input_path', type=str, help="base input path")
+ parser.add_argument('--seq_name', type=str, help="sequence name")
+ parser.add_argument('--model_name', type=str, default='dpflow', help="Optical flow model to use")
+ parser.add_argument('--checkpoint', type=str, default='sintel', help="Model checkpoint/dataset name")
+ parser.add_argument('--resize_width', type=int, default=None, help="Resize frame width (must specify both width and height)")
+ parser.add_argument('--resize_height', type=int, default=None, help="Resize frame height (must specify both width and height)")
+ parser.add_argument('--fps', type=int, default=10, help="Frame rate for output video")
+
+ return parser
+
+if __name__ == '__main__':
+ parser = get_parser()
+ args = parser.parse_args()
+
+ # Path
+ frames_dir = f'{args.input_path}/{args.seq_name}/imgs'
+ flow_vis_dir = frames_dir.replace("imgs", "flow_vis")
+ flow_save_dir = frames_dir.replace("imgs", "flow")
+
+ os.makedirs(flow_vis_dir, exist_ok=True)
+ os.makedirs(flow_save_dir, exist_ok=True)
+
+ # Prepare resize parameter
+ resize_to = None
+ if args.resize_width and args.resize_height:
+ resize_to = (args.resize_width, args.resize_height)
+
+ # Process optical flow
+ success = main(
+ frames_dir=frames_dir,
+ flow_vis_dir=flow_vis_dir,
+ flow_save_dir=flow_save_dir,
+ resize_to=resize_to,
+ model_name=args.model_name,
+ checkpoint=args.checkpoint
+ )
+
+ print("Optical flow processing completed successfully")
diff --git a/third_party/Puppeteer/animation/utils/save_utils.py b/third_party/Puppeteer/animation/utils/save_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c46e4a47349ff05690944e79ab7385a2727326cc
--- /dev/null
+++ b/third_party/Puppeteer/animation/utils/save_utils.py
@@ -0,0 +1,374 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pytorch3d.io import load_obj
+from pytorch3d.renderer import TexturesAtlas
+from pytorch3d.structures import Meshes
+
+import os
+import torch
+import json
+import numpy as np
+from tqdm import tqdm
+from pathlib import Path
+import subprocess
+from PIL import Image
+from scipy.ndimage import gaussian_filter1d
+from third_partys.co_tracker.save_track import save_track
+
+def render_single_mesh(renderer, mesh_path, out_path="render_result.png", atlas_size=8):
+ """
+ Test render a single mesh and save the result.
+ """
+ device = renderer.device
+
+ verts, faces, aux = load_obj(
+ mesh_path,
+ device=device,
+ load_textures=True,
+ create_texture_atlas=True,
+ texture_atlas_size=atlas_size,
+ texture_wrap="repeat"
+ )
+ atlas = aux.texture_atlas # (F, atlas_size, atlas_size, 3)
+
+ vmin, vmax = verts.min(0).values, verts.max(0).values
+ center = (vmax + vmin) / 2.
+ scale = (vmax - vmin).max()
+ verts = (verts - center) / scale
+
+ mesh_norm = Meshes(
+ verts=[verts],
+ faces=[faces.verts_idx],
+ textures=TexturesAtlas(atlas=[atlas])
+ )
+ with torch.no_grad():
+ rendered = renderer.render(mesh_norm) # shape=[1, H, W, 4]
+
+ rendered_img = renderer.tensor_to_image(rendered)
+
+ pil_img = Image.fromarray(rendered_img)
+ pil_img.save(out_path)
+ print(f"Saved render to {out_path}")
+
+def apply_gaussian_smoothing(data, sigma = 1.0, preserve_first_frame = True, eps = 1e-8):
+ """
+ Apply Gaussian smoothing along the time axis with quaternion normalization.
+ """
+ smoothed = gaussian_filter1d(data, sigma=sigma, axis=0)
+
+ # Preserve first frame if requested
+ if preserve_first_frame and data.shape[0] > 0:
+ smoothed[0] = data[0]
+
+ if data.shape[-1] == 4:
+ norms = np.linalg.norm(smoothed, axis=-1, keepdims=True)
+ smoothed = smoothed / np.maximum(norms, eps)
+
+ return smoothed
+
+def render_single_view_sequence(quats, root_quats, root_pos, renderer, model, output_dir, view_name, fps = 25):
+ """
+ Render animation sequence from a single viewpoint.
+ """
+ output_dir = Path(output_dir)
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ T = quats.shape[0]
+
+ model.animate(quats, root_quats, root_pos)
+
+ for i in tqdm(range(T), desc=f"Rendering {view_name}"):
+ mesh = model.get_mesh(i)
+ rendered = renderer.render(mesh)
+
+ img_array = renderer.tensor_to_image(rendered)
+ img = Image.fromarray(img_array)
+
+ frame_path = output_dir / f"{view_name}_frame_{i:04d}.png"
+ img.save(frame_path)
+
+ # Create video
+ video_path = output_dir / f"{view_name}_output_video.mp4"
+ cmd = f"ffmpeg -y -framerate {fps} -i {output_dir}/{view_name}_frame_%04d.png -c:v libx264 -pix_fmt yuv420p {video_path}"
+ subprocess.call(cmd, shell=True)
+
+def save_and_smooth_results(args, model, renderer, final_quats, root_quats, root_pos, out_dir, additional_renderers = None, load_pt = False, sigma = 1.0, fps = 25):
+ """
+ Save and smooth animation results with multi-view rendering.
+ """
+ device = final_quats.device
+ T = final_quats.shape[0]
+ # Save Raw Results
+ if not load_pt:
+ raw_dir = os.path.join(out_dir, "raw")
+ os.makedirs(raw_dir, exist_ok=True)
+
+ torch.save(final_quats, os.path.join(raw_dir, "local_quats.pt"))
+ torch.save(root_quats, os.path.join(raw_dir, "root_quats.pt"))
+ torch.save(root_pos, os.path.join(raw_dir, "root_pos.pt"))
+ if hasattr(model, 'rest_local_positions'):
+ torch.save(model.rest_local_positions, os.path.join(raw_dir, "rest_local_positions.pt"))
+
+ print(f"Saved raw motion to {raw_dir}")
+
+ quats_np = final_quats.cpu().numpy()
+ root_quats_np = root_quats.cpu().numpy()
+ root_pos_np = root_pos.cpu().numpy()
+
+ # Apply Gaussian smoothing if enabled
+ if args.gauss_filter:
+ print(f"Applying Gaussian smoothing (sigma={sigma})")
+
+ smooth_quats_np = apply_gaussian_smoothing(
+ quats_np, sigma=sigma, preserve_first_frame=True
+ )
+ smooth_root_quats_np = apply_gaussian_smoothing(
+ root_quats_np, sigma=sigma, preserve_first_frame=True
+ )
+ smooth_root_pos_np = apply_gaussian_smoothing(
+ root_pos_np, sigma=sigma, preserve_first_frame=True
+ )
+ smooth_dir = os.path.join(out_dir, "smoothed")
+ os.makedirs(smooth_dir, exist_ok=True)
+ save_dir = smooth_dir
+
+ else:
+ smooth_quats_np = quats_np
+ smooth_root_quats_np = root_quats_np
+ smooth_root_pos_np = root_pos_np
+ save_dir = raw_dir
+
+ smooth_quats = torch.tensor(smooth_quats_np, dtype=torch.float32, device=device)
+ smooth_root_quats = torch.tensor(smooth_root_quats_np, dtype=torch.float32, device=device)
+ smooth_root_pos = torch.tensor(smooth_root_pos_np, dtype=torch.float32, device=device)
+
+ # Render Sequences
+ if not load_pt and args.gauss_filter:
+ smooth_dir_path = Path(smooth_dir)
+ torch.save(smooth_quats, smooth_dir_path / "local_quats.pt")
+ torch.save(smooth_root_quats, smooth_dir_path / "root_quats.pt")
+ torch.save(smooth_root_pos, smooth_dir_path / "root_pos.pt")
+ print(f"Saved smoothed motion to {smooth_dir}")
+
+ # Render main view
+ print(f"Rendering {args.main_renderer} view ({T} frames)")
+ render_single_view_sequence(
+ smooth_quats, smooth_root_quats, smooth_root_pos,
+ renderer, model, save_dir, args.main_renderer, fps
+ )
+
+ # Render additional views if provided
+ if additional_renderers:
+ for renderer_key, view_renderer in additional_renderers.items():
+ view_name = renderer_key.replace("_renderer", "")
+ render_single_view_sequence(
+ smooth_quats, smooth_root_quats, smooth_root_pos,
+ view_renderer, model, save_dir, view_name, fps
+ )
+
+def save_args(args, output_dir, filename="config.json"):
+ args_dict = vars(args)
+ os.makedirs(output_dir, exist_ok=True)
+
+ config_path = os.path.join(output_dir, filename)
+ with open(config_path, 'w') as f:
+ json.dump(args_dict, f, indent=4)
+
+def visualize_joints_on_mesh(model, renderer, seq_name, out_dir):
+ """
+ Render mesh with joint visualizations and return visibility mask.
+ """
+ joints_2d = renderer.project_points(model.joints_rest)
+
+ mesh = model.get_mesh()
+ image_with_joints, vis_mask = renderer.render_with_points(mesh, model.joints_rest)
+ image_np = image_with_joints[0].cpu().numpy()
+ if image_np.shape[2] == 4:
+ image_rgb = image_np[..., :3]
+ else:
+ image_rgb = image_np
+ if image_rgb.max() <= 1.0:
+ image_rgb = (image_rgb * 255).astype(np.uint8)
+ img = Image.fromarray(image_rgb)
+ output_path = f"{out_dir}/mesh_with_joints_{seq_name}_visible.png"
+ img.save(output_path)
+ return vis_mask
+
+def visualize_points_on_mesh(model, renderer, seq_name, out_dir):
+ """
+ Render mesh with point visualizations and return visibility mask.
+ """
+ points_2d = renderer.project_points(model.vertices[0])
+
+ mesh = model.get_mesh()
+ image_with_points, vis_mask = renderer.render_with_points(mesh, model.vertices[0], for_vertices=True)
+ image_np = image_with_points[0].cpu().numpy()
+ if image_np.shape[2] == 4:
+ image_rgb = image_np[..., :3]
+ else:
+ image_rgb = image_np
+ if image_rgb.max() <= 1.0:
+ image_rgb = (image_rgb * 255).astype(np.uint8)
+ img = Image.fromarray(image_rgb)
+ output_path = f"{out_dir}/mesh_with_verts_{seq_name}_visible.png"
+ img.save(output_path)
+ return vis_mask
+
+def save_track_points(point_vis_mask, renderer, model, img_path, out_dir, args):
+ """
+ Save and track selected points on the mesh with intelligent sampling.
+ """
+
+ vertex_project_2d = renderer.project_points(model.vertices[0])
+ visible_indices = torch.where(point_vis_mask)[0]
+
+ track_2d_point_path = img_path.replace('imgs', 'track_2d_verts')
+ os.makedirs(track_2d_point_path, exist_ok=True)
+
+ num_visible = len(visible_indices)
+ MAX_VISIBLE_POINTS = 15000
+ MAX_SAMPLE_POINTS = 4000
+
+ # Determine tracking strategy
+ tracking_mode = "full" if num_visible <= MAX_VISIBLE_POINTS else "sampled"
+
+ if not os.listdir(track_2d_point_path):
+ # Generate new tracking data
+ if tracking_mode == "full":
+ print(f"Saving tracks for all visible vertices (count: {num_visible})")
+
+ # Track all visible points
+ visible_vertex_project_2d = vertex_project_2d[visible_indices]
+ track_2d_point = save_track(
+ args.seq_name, visible_vertex_project_2d, img_path,
+ track_2d_point_path, out_dir, for_point=True
+ )
+
+ np.save(f'{track_2d_point_path}/visible_indices.npy',
+ visible_indices.cpu().numpy())
+
+ # Sample subset for final use
+ num_sample = min(MAX_SAMPLE_POINTS, num_visible)
+ sampled_local_indices = torch.randperm(num_visible)[:num_sample]
+ sampled_vertex_indices = visible_indices[sampled_local_indices]
+ np.save(f'{track_2d_point_path}/sampled_indices.npy',
+ sampled_vertex_indices.cpu().numpy())
+
+ else:
+ print(f"Too many visible vertices ({num_visible} > {MAX_VISIBLE_POINTS}), "
+ f"tracking only {MAX_SAMPLE_POINTS} sampled vertices")
+
+ # Sample points directly from visible set
+ num_sample = min(MAX_SAMPLE_POINTS, num_visible)
+ sampled_local_indices = torch.randperm(num_visible)[:num_sample]
+ sampled_vertex_indices = visible_indices[sampled_local_indices]
+
+ # Track only sampled points
+ sampled_vertex_project_2d = vertex_project_2d[sampled_vertex_indices]
+ track_2d_point = save_track(
+ args.seq_name, sampled_vertex_project_2d, img_path,
+ track_2d_point_path, out_dir, for_point=True
+ )
+
+ np.save(f'{track_2d_point_path}/visible_indices.npy',
+ visible_indices.cpu().numpy())
+ np.save(f'{track_2d_point_path}/sampled_indices.npy',
+ sampled_vertex_indices.cpu().numpy())
+
+ else:
+ # Load existing tracking data
+ print("Loading existing vertex tracks")
+ track_2d_point = np.load(f'{track_2d_point_path}/pred_tracks.npy')
+
+ visible_indices = np.load(f'{track_2d_point_path}/visible_indices.npy')
+ visible_indices = torch.from_numpy(visible_indices).long().to(args.device)
+
+ sampled_vertex_indices = np.load(f'{track_2d_point_path}/sampled_indices.npy')
+ sampled_vertex_indices = torch.from_numpy(sampled_vertex_indices).long().to(args.device)
+
+ track_2d_point = torch.from_numpy(track_2d_point).float().to(args.device)
+
+ # Create index mapping for tracking data
+ if tracking_mode == "full":
+ # Map from original vertex indices to positions in tracking data
+ vertex_to_track_idx = {idx.item(): i for i, idx in enumerate(visible_indices)}
+
+ track_indices = torch.tensor(
+ [vertex_to_track_idx[idx.item()] for idx in sampled_vertex_indices],
+ device=args.device, dtype=torch.long
+ )
+ else:
+ # Direct mapping for sampled-only tracking
+ track_indices = torch.arange(len(sampled_vertex_indices),
+ device=args.device, dtype=torch.long)
+
+ return track_2d_point, track_indices, sampled_vertex_indices
+
+def save_final_video(args):
+
+ additional_views = [view.strip() for view in args.additional_renderers.split(',') if view.strip()]
+ if len(additional_views) > 3:
+ additional_views = additional_views[:3]
+ additional_views = [view for view in additional_views if view != args.main_renderer]
+
+ save_dir = 'raw' if not args.gauss_filter else 'smoothed'
+ import subprocess
+ cmd = (
+ f'ffmpeg '
+ f'-i {args.input_path}/{args.seq_name}/input.mp4 '
+ f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{args.main_renderer}_output_video.mp4 '
+ '-filter_complex "'
+ '[0:v][1:v]hstack=inputs=2[stacked]; '
+ '[stacked]drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'gt\':x=(w/4-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+ f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'ours\':x=(3*w/4-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10" '
+ f'-c:a copy {args.save_path}/{args.seq_name}/{args.save_name}/concat_output.mp4'
+ )
+
+ subprocess.call(cmd, shell=True)
+ cmd = (
+ f'ffmpeg '
+ f'-i {args.input_path}/{args.seq_name}/input.mp4 '
+ f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{args.main_renderer}_output_video.mp4 '
+ f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{additional_views[0]}_output_video.mp4 '
+ f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{additional_views[1]}_output_video.mp4 '
+ f'-i {args.save_path}/{args.seq_name}/{args.save_name}/{save_dir}/{additional_views[2]}_output_video.mp4 '
+ '-filter_complex "'
+ '[0:v][1:v][2:v][3:v][4:v]hstack=inputs=5[stacked]; '
+ '[stacked]drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'gt\':x=(w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+ f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{args.main_renderer}\':x=(3*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+ f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{additional_views[0]}\':x=(5*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+ f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{additional_views[1]}\':x=(7*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10, '
+ f'drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:text=\'{additional_views[2]}\':x=(9*w/10-text_w/2):y=20:fontsize=24:fontcolor=white:box=1:boxcolor=black:boxborderw=10" '
+ f'-c:a copy {args.save_path}/{args.seq_name}/{args.save_name}/concat_output_4view.mp4'
+ )
+ subprocess.call(cmd, shell=True)
+
+def load_motion_data(motion_dir, device="cuda:0"):
+ """
+ Load saved motion data.
+ """
+ local_quats = torch.load(os.path.join(motion_dir, "local_quats.pt"), map_location=device)
+ root_quats = torch.load(os.path.join(motion_dir, "root_quats.pt"), map_location=device)
+ root_pos = torch.load(os.path.join(motion_dir, "root_pos.pt"), map_location=device)
+
+ # Load rest positions if available (for reference)
+ rest_pos_path = os.path.join(motion_dir, "rest_local_positions.pt")
+ if os.path.exists(rest_pos_path):
+ rest_positions = torch.load(rest_pos_path, map_location=device)
+ else:
+ rest_positions = None
+ print("Warning: rest_local_positions.pt not found, model should have them initialized")
+
+ return local_quats, root_quats, root_pos, rest_positions
\ No newline at end of file
diff --git a/third_party/Puppeteer/checkpoints/rig.ckpt b/third_party/Puppeteer/checkpoints/rig.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..db36da72118177ff285f1f68549a022964955efd
--- /dev/null
+++ b/third_party/Puppeteer/checkpoints/rig.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0019dfc4b32d63c1392aa264aed2253c1e0c2fb09216f8e2cc269bbfb8bb49b5
+size 9
diff --git a/third_party/Puppeteer/demo_animation.sh b/third_party/Puppeteer/demo_animation.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6c7b368b9446df2e91eafc72e319e7e66f05ab4c
--- /dev/null
+++ b/third_party/Puppeteer/demo_animation.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+echo "Running animation..."
+
+# copy rig and mesh for animation
+for txt_file in results/final_rigging/*.txt; do
+ if [ -f "$txt_file" ]; then
+ seq_name=$(basename "$txt_file" .txt)
+
+ mkdir -p "examples/$seq_name/objs/"
+
+ cp "$txt_file" "examples/$seq_name/objs/rig.txt"
+ echo "Copied $txt_file -> examples/$seq_name/objs/rig.txt"
+
+ obj_file="examples/$seq_name.obj"
+ if [ -f "$obj_file" ]; then
+ cp "$obj_file" "examples/$seq_name/objs/mesh.obj"
+ echo "Copied $obj_file -> examples/$seq_name/objs/mesh.obj"
+ else
+ echo "Warning: $obj_file not found"
+ fi
+
+ # extract frames
+ video_file="examples/$seq_name/input.mp4"
+ if [ -f "$video_file" ]; then
+ echo "Found video file: $video_file"
+ cd "examples/$seq_name"
+ mkdir -p imgs
+ ffmpeg -i input.mp4 -vf fps=10 imgs/frame_%04d.png -y
+ echo "Extracted frames from $video_file to imgs/"
+ cd ../../
+ else
+ echo "No video file found: $video_file"
+ fi
+ fi
+done
+
+cd animation
+
+# save flow
+echo "Processing sequences with save_flow.py..."
+for seq_dir in ../examples/*/; do
+ if [ -d "$seq_dir" ]; then
+ seq_name=$(basename "$seq_dir")
+ echo "Processing sequence: $seq_name"
+ python utils/save_flow.py --input_path ../examples --seq_name "$seq_name"
+ fi
+done
+
+# animation
+echo "Running optimization for each sequence..."
+mkdir -p ../results/animation
+
+python optimization.py --save_path ../results/animation --iter 200 --input_path ../examples --img_size 960 \
+ --seq_name 'spiderman' --save_name 'spiderman_demo'
+
+python optimization.py --save_path ../results/animation --iter 200 --input_path ../examples --img_size 960 \
+ --seq_name 'deer' --save_name 'deer_demo' --smooth_weight 1 --main_renderer front_left --additional_renderer "right,front_right,back_right"
+
+echo "Animation completed."
+
+cd ..
+echo "Puppeteer pipeline completed successfully!"
\ No newline at end of file
diff --git a/third_party/Puppeteer/demo_rigging.sh b/third_party/Puppeteer/demo_rigging.sh
new file mode 100644
index 0000000000000000000000000000000000000000..dadc9b0c883dd996b6716e80b7c49d054d81cbcd
--- /dev/null
+++ b/third_party/Puppeteer/demo_rigging.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# Robust Puppeteer rigging pipeline
+# - stop on errors
+# - safe path checks
+# - always copy artifacts into /data/results
+
+set -euo pipefail
+
+echo "[INFO] Starting Puppeteer rigging pipeline..."
+
+# ---------- env / paths ----------
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+# repo 레이아웃이 /app/Puppeteer/{skeleton,skinning,...} 라고 가정
+ROOT_DIR="$(dirname "$SCRIPT_DIR")" # /app/Puppeteer
+TMP_DIR="/tmp/puppeteer_run"
+WORK_DIR="${TMP_DIR}" # 통합 작업 디렉토리
+IN_EXAMPLES="${ROOT_DIR}/examples"
+OUT_ROOT="${WORK_DIR}/results"
+OUT_SKEL="${OUT_ROOT}/skel_results"
+OUT_SKEL_FOR_SKIN="${OUT_ROOT}/skeletons"
+OUT_SKIN="${OUT_ROOT}/skin_results"
+OUT_FINAL="${OUT_ROOT}/final_rigging"
+RESULT_DIR="${RESULT_DIR:-/data/results}" # app.py와 동일 환경변수 사용
+
+mkdir -p "$WORK_DIR" "$OUT_ROOT" "$OUT_SKEL_FOR_SKIN" "$OUT_FINAL" "$RESULT_DIR"
+
+# python 경로 보강 (third_party / third_partys 호환)
+export PYTHONPATH="/app:/app/Puppeteer:/app/Puppeteer/third_party:${PYTHONPATH:-}"
+[ -d /app/third_partys ] || ln -s /app/Puppeteer/third_party /app/third_partys 2>/dev/null || true
+[ -f /app/Puppeteer/third_party/__init__.py ] || touch /app/Puppeteer/third_party/__init__.py
+
+# ---------- skeleton ----------
+echo "[INFO] Running skeleton generation..."
+cd "${ROOT_DIR}/skeleton"
+
+python demo.py \
+ --input_dir "${IN_EXAMPLES}" \
+ --pretrained_weights skeleton_ckpts/puppeteer_skeleton_w_diverse_pose.pth \
+ --output_dir "${OUT_ROOT}" \
+ --save_name skel_results \
+ --input_pc_num 8192 \
+ --save_render \
+ --apply_marching_cubes \
+ --joint_token \
+ --seq_shuffle
+
+echo "[INFO] Skeleton generation completed."
+
+# skel 결과를 skinning 입력 폴더로 복사 (존재 검증)
+echo "[INFO] Preparing skeletons for skinning..."
+if [ -d "${OUT_SKEL}" ]; then
+ mkdir -p "${OUT_SKEL_FOR_SKIN}"
+ shopt -s nullglob
+ for f in "${OUT_SKEL}"/*_pred.txt; do
+ cp -f "$f" "${OUT_SKEL_FOR_SKIN}/$(basename "${f/_pred.txt/.txt}")"
+ done
+ shopt -u nullglob
+else
+ echo "[ERR] ${OUT_SKEL} not found (skeleton step failed?)"
+ exit 1
+fi
+echo "[INFO] Copied rig files to ${OUT_SKEL_FOR_SKIN}"
+
+# ---------- skinning ----------
+echo "[INFO] Running skinning..."
+cd "${ROOT_DIR}/skinning"
+
+# CUDA_VISIBLE_DEVICES는 Space에서 보통 0 하나만 할당됨
+CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}" \
+torchrun --nproc_per_node=1 --master_port=10009 \
+ main.py \
+ --num_workers 1 \
+ --batch_size 1 \
+ --generate \
+ --save_skin_npy \
+ --pretrained_weights skinning_ckpts/puppeteer_skin_w_diverse_pose_depth1.pth \
+ --input_skel_folder "${OUT_SKEL_FOR_SKIN}" \
+ --mesh_folder "${IN_EXAMPLES}" \
+ --post_filter \
+ --depth 1 \
+ --save_folder "${OUT_SKIN}"
+
+echo "[INFO] Skinning completed."
+
+# ---------- collect artifacts ----------
+echo "[INFO] Collecting final artifacts..."
+mkdir -p "${OUT_FINAL}"
+
+# 선호 산출물: output/rigged.glb (있으면 우선 복사)
+if [ -f "${WORK_DIR}/output/rigged.glb" ]; then
+ cp -f "${WORK_DIR}/output/rigged.glb" "${OUT_FINAL}/rigged.glb"
+fi
+
+# skin 결과(glb) 있으면 함께 수집
+if [ -d "${OUT_SKIN}/generate" ]; then
+ shopt -s nullglob
+ cp -f "${OUT_SKIN}/generate/"*.glb "${OUT_FINAL}/" 2>/dev/null || true
+ shopt -u nullglob
+fi
+
+# ---------- export to /data/results ----------
+echo "[INFO] Exporting to ${RESULT_DIR} ..."
+mkdir -p "${RESULT_DIR}"
+shopt -s nullglob
+cp -f "${OUT_FINAL}/"*.glb "${RESULT_DIR}/" 2>/dev/null || true
+cp -f "${OUT_FINAL}/"*.gltf "${RESULT_DIR}/" 2>/dev/null || true
+shopt -u nullglob
+
+# 결과 검증: 최소 하나라도 존재해야 성공 처리
+if compgen -G "${RESULT_DIR}/*.glb" > /dev/null || compgen -G "${RESULT_DIR}/*.gltf" > /dev/null ; then
+ echo "[OK] Artifacts saved to ${RESULT_DIR}"
+else
+ echo "[ERR] No .glb/.gltf produced. Check skeleton/skinning logs."
+ exit 2
+fi
+
+echo "[INFO] Pipeline finished successfully."
diff --git a/third_party/Puppeteer/requirements.txt b/third_party/Puppeteer/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e4e9078ae364d9b8b916ca2b19d691ab081b06d
--- /dev/null
+++ b/third_party/Puppeteer/requirements.txt
@@ -0,0 +1,29 @@
+trimesh==4.2.3
+accelerate==0.28.0
+mesh2sdf==1.1.0
+transformers==4.46.1
+numpy==1.26.4
+pyrender==0.1.45
+tqdm
+opencv-python==4.9.0.80
+omegaconf==2.3.0
+einops==0.7.0
+timm
+lightning==2.2
+boto3
+cython==0.29.36
+tetgen==0.5.2
+loguru
+pytz
+h5py
+plyfile
+pymeshlab
+yacs
+fvcore
+easydict
+libigl==2.5.1
+scikit-learn
+jsonargparse
+ptlflow
+imageio-ffmpeg==0.4.7
+xformers==0.0.23
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/README.md b/third_party/Puppeteer/skeleton/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4c258b3e4f9bcfd69a55a2d2dda9bcef386178da
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/README.md
@@ -0,0 +1,93 @@
+# Auto-regressive Skeleton Generation
+This folder provides the skeleton generation implementation and scripts to evaluate the paper’s metrics on three test sets. You can also run inference on your own 3D objects.
+
+## Weights Download
+First download [checkpoints of Michelangelo](https://huggingface.co/Maikou/Michelangelo/tree/main/checkpoints/aligned_shape_latents) and our [released weights](https://huggingface.co/Seed3D/Puppeteer) for skeleton generation:
+
+```
+python download.py
+```
+
+## Evaluation
+
+To reproduce our evaluations, run the following command on `Articulation-XL2.0-test`, `ModelsResource-test` and `Diverse-pose-test`. `Articulation-XL2.0-test` and `Diverse-pose-test` are available [here](https://huggingface.co/datasets/chaoyue7/Articulation-XL2.0). For your convenience, we also save `ModelsResource-test` in our format (download it [here](https://drive.google.com/file/d/12U2ZuZWcKCQRI3IheBbG6I9-jfpG4KF5/view?usp=sharing)). The inference process requires 4.6 GB of VRAM and takes 1–2 seconds per inference.
+
+```
+bash eval.sh
+```
+You can change `save_name` for different evaluation and check the quantitative results afterwards in `evaluate_results.txt`. The pipeline saves mesh and skeleton as `.obj` files; pass `--save_render` to additionally generate rendered previews of the mesh and skeleton.
+
+These are the numbers (the metrics are in units of 10−2) that you should be able to reproduce using the released weights and the current version of the codebase.
+
+
+
+ | Test set |
+ Articulation-XL2.0-test |
+ ModelsResource-test |
+ Diverse-pose-test |
+
+
+ | CD-J2J |
+ CD-J2B |
+ CD-B2B |
+ CD-J2J |
+ CD-J2B |
+ CD-B2B |
+ CD-J2J |
+ CD-J2B |
+ CD-B2B |
+
+
+
+
+ | train on Arti-XL2.0 w/o diverse-pose subset |
+ 3.062 |
+ 2.342 |
+ 1.963 |
+ 3.843 |
+ 2.876 |
+ 2.465 |
+ 3.276 |
+ 2.597 |
+ 2.074 |
+
+
+ | train on Arti-XL2.0 w/ diverse-pose subset |
+ 3.047 |
+ 2.337 |
+ 1.952 |
+ 3.785 |
+ 2.847 |
+ 2.430 |
+ 2.483 |
+ 1.922 |
+ 1.600 |
+
+
+
+
+Note: If your results differ from the reported numbers in the table above (e.g., 3.78-->~3.90 for CD-J2J on ModelsResource), check the version of `transformers` which may cause the following warnings:
+```
+Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in SkeletonOPTModel is torch.float32. You should run training or inference using Automatic Mixed-Precision via the with torch.autocast(device_type='torch_device'): decorator, or load the model with the torch_dtype argument. Example: model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)
+Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in SkeletonOPTDecoder is torch.float32. You should run training or inference using Automatic Mixed-Precision via the with torch.autocast(device_type='torch_device'): decorator, or load the model with the torch_dtype argument. Example: model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)
+```
+These results were obtained using `CUDA 11.8`. We observed that switching to CUDA 12.1 or other versions, while keeping all package versions identical, resulted in slight numerical variations.
+
+## Demo
+We provide some examples (download [here](https://drive.google.com/file/d/1bjtA3JSqW-t0YoSd2vOZy3iKvuOMLIrm/view?usp=sharing)) to test our models by running the following command. You can also test our models on your 3D objects, remeber to change the `input_dir`.
+```
+bash demo.sh
+```
+
+Input mesh quality directly affects model performance, since the pre-trained shape encoder was trained on high-quality meshes. You can test reconstruction using the shape latents extracted from the shape encoder to check your data. The example below shows results from an input mesh with coarse surface.
+
+
+
+
+
+## Visualization
+We use MeshLab for skeleton visualization in paper. The skeleton can be saved using `save_skeleton_obj` in `utils/save_utils.py`. Bones are represented as blue cones oriented from the parent joint to the child joint, joints as red spheres, and the root joint as a green sphere. Example results are shown below.
+
+
+
+
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/data_utils/README.md b/third_party/Puppeteer/skeleton/data_utils/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5dfc4a1e3d98fd27439606ba26ba087c44debbf8
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/README.md
@@ -0,0 +1,43 @@
+## Preprocessed data
+We provide the preprocessed data that saved in NPZ files, which contain the following information:
+```
+'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'pc_w_norm', 'joint_names', 'skinning_weights_value', 'skinning_weights_rows', 'skinning_weights_cols', 'skinning_weights_shape'
+```
+You can check `read_npz.py` for how to read the NPZ files and `save_npz.py` for how we save them.
+
+Before saving them into NPZ files, we extract mesh(.obj) and rig(.txt) from downloaded 3D models from Objaverse-XL using Blender. The rig file follows the format in [RigNet](https://github.com/zhan-xu/RigNet), which includes the following entries:
+```
+joints [joint_name] [x] [y] [z]
+root [root_joint_name]
+skin [vertex_index] [joints_name1] [skinning_weight1] [joints_name2] [skinning_weight2] ...
+hier [parent_joint_name] [child_joint_name]
+```
+For an example, please see `examples/0a59c5ffa4a1476bac6d540b79947f31.txt`.
+
+If you want to convert NPZ file back to OBJ and TXT files, we give an example by running:
+```
+python convert_npz_to_mesh_rig.py
+```
+
+## Visualization
+We provide a method for visualizing 3D models with skeleton using [Pyrender](https://github.com/mmatl/pyrender), modified from [Lab4D](https://github.com/lab4d-org/lab4d/tree/ppr/). This visualization also serves as input to the VLM for skeleton quality rating. Make sure you have installed the following packages before running visualization:
+```
+pip install trimesh opencv-python pyrender
+```
+
+We provide an example to demonstrate the process. For this example, we prepare an OBJ file along with a TXT file containing rigging information. Then, run:
+```
+python render_data.py
+```
+You will obtain the following outputs:
+
+
+
+
+
+### Reading rig and mesh from GLBs
+We provide the script we use for reading rig (.txt) and mesh (.obj) from glb files. You can run:
+```
+python read_rig_mesh_from_glb.py
+```
+Remember to download Blender (we use 4.2.0) and also bpy in your conda environment.
diff --git a/third_party/Puppeteer/skeleton/data_utils/convert_npz_to_mesh_rig.py b/third_party/Puppeteer/skeleton/data_utils/convert_npz_to_mesh_rig.py
new file mode 100644
index 0000000000000000000000000000000000000000..5122bb75f6d6118a28afec4a5e745d0569f19714
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/convert_npz_to_mesh_rig.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+You can convert npz file back to obj(mesh) and txt(rig) files using this python script.
+"""
+import os
+import numpy as np
+import scipy.sparse as sp
+
+def export_obj(vertices, faces, normals, output_path):
+ with open(output_path, 'w') as f:
+ for v in vertices:
+ f.write(f"v {v[0]} {v[1]} {v[2]}\n")
+ for n in normals:
+ f.write(f"vn {n[0]} {n[1]} {n[2]}\n")
+ for i, face in enumerate(faces):
+ # OBJ format is 1-based, so we add 1 to all indices
+ f.write(f"f {face[0]+1}//{face[0]+1} {face[1]+1}//{face[1]+1} {face[2]+1}//{face[2]+1}\n")
+
+def export_rig_txt(joints, bones, root_index, joint_names, skinning_weights, output_path):
+ """
+ joints [joint_name] [x] [y] [z]
+ root [root_joint_name]
+ skin [vertex_index] [joint_name1] [weight1] [joint_name2] [weight2] ...
+ hier [parent_joint_name] [child_joint_name]
+ """
+ n_joints = len(joints)
+ n_verts = skinning_weights.shape[0] # (n_vertex, n_joints)
+
+ with open(output_path, 'w') as f:
+ # 1) joints
+ for i in range(n_joints):
+ x, y, z = joints[i]
+ jn = joint_names[i]
+ f.write(f"joints {jn} {x} {y} {z}\n")
+
+ # 2) root
+ root_name = joint_names[root_index]
+ f.write(f"root {root_name}\n")
+
+ # 3) skin
+ for vidx in range(n_verts):
+ row_weights = skinning_weights[vidx]
+ non_zero_indices = np.where(row_weights != 0)[0]
+ if len(non_zero_indices) == 0:
+ continue
+
+ line_parts = [f"skin {vidx}"] # vertex_idx
+ for jidx in non_zero_indices:
+ w = row_weights[jidx]
+ jn = joint_names[jidx]
+ line_parts.append(jn)
+ line_parts.append(str(w))
+
+ f.write(" ".join(line_parts) + "\n")
+
+ # 4) hier
+ for p_idx, c_idx in bones:
+ p_name = joint_names[p_idx]
+ c_name = joint_names[c_idx]
+ f.write(f"hier {p_name} {c_name}\n")
+
+if __name__ == "__main__":
+
+ data = np.load('articulation_xlv2_test.npz', allow_pickle=True)
+ data_list = data['arr_0']
+
+ print(f"Loaded {len(data_list)} data entries")
+
+ model_data = data_list[0]
+ print("Data keys:", model_data.keys())
+ # 'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'joint_names',
+ # 'skinning_weights_value', 'skinning_weights_row', 'skinning_weights_col', 'skinning_weights_shape'
+
+ vertices = model_data['vertices'] # (n_vertex, 3)
+ faces = model_data['faces'] # (n_faces, 3)
+ normals = model_data['normals'] # (n_vertex, 3)
+ joints = model_data['joints'] # (n_joints, 3)
+ bones = model_data['bones'] # (n_bones, 2)
+ root_index = model_data['root_index'] # int
+ joint_names = model_data['joint_names'] # list of str
+ uuid_str = model_data['uuid']
+
+ skin_val = model_data['skinning_weights_value']
+ skin_row = model_data['skinning_weights_row']
+ skin_col = model_data['skinning_weights_col']
+ skin_shape = model_data['skinning_weights_shape']
+ skin_sparse = sp.coo_matrix((skin_val, (skin_row, skin_col)), shape=skin_shape)
+ skinning_weights = skin_sparse.toarray() # (n_vertex, n_joints)
+
+ obj_path = f"{uuid_str}.obj"
+ export_obj(vertices, faces, normals, obj_path)
+ rig_txt_path = f"{uuid_str}.txt"
+ export_rig_txt(joints, bones, root_index, joint_names, skinning_weights, rig_txt_path)
+
+ print("Done!")
diff --git a/third_party/Puppeteer/skeleton/data_utils/data_loader.py b/third_party/Puppeteer/skeleton/data_utils/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..b29ab6108377fcf2d82139ededc7c59c4aeaed5e
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/data_loader.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import glob
+import numpy as np
+import trimesh
+
+class DataLoader:
+ def __init__(self):
+ self.joint_name_to_idx = {}
+
+ def load_rig_data(self, rig_path):
+ joints = []
+ joints_names = []
+ bones = []
+
+ with open(rig_path, 'r') as f:
+ for line in f:
+ parts = line.strip().split()
+ if parts[0] == 'joints':
+ joint_name = parts[1]
+ joint_pos = [float(parts[2]), float(parts[3]), float(parts[4])]
+ self.joint_name_to_idx[joint_name] = len(joints)
+ joints.append(joint_pos)
+ joints_names.append(joint_name)
+ elif parts[0] == 'root':
+ self.root_name = parts[1]
+ elif parts[0] == 'hier':
+ parent_joint = self.joint_name_to_idx[parts[1]]
+ child_joint = self.joint_name_to_idx[parts[2]]
+ bones.append([parent_joint, child_joint])
+
+ self.joints = np.array(joints)
+ self.bones = np.array(bones)
+ self.joints_names = joints_names
+ self.root_idx = None
+ if self.root_name is not None:
+ self.root_idx = self.joint_name_to_idx[self.root_name]
+
+ def load_mesh(self, mesh_path):
+ mesh = trimesh.load(mesh_path, process=False)
+ mesh.visual.vertex_colors[:, 3] = 100 # set transparency
+ self.mesh = mesh
+
+ # Compute the centroid normal of the mesh
+ v = self.mesh.vertices
+ xmin, ymin, zmin = v.min(axis=0)
+ xmax, ymax, zmax = v.max(axis=0)
+ self.bbox_center = np.array([(xmax + xmin)/2, (ymax + ymin)/2, (zmax + zmin)/2])
+ self.bbox_size = np.array([xmax - xmin, ymax - ymin, zmax - zmin])
+ self.bbox_scale = max(xmax - xmin, ymax - ymin, zmax - zmin)
+
+ normal = mesh.center_mass - self.bbox_center
+ normal = normal / (np.linalg.norm(normal)+1e-5)
+
+ # Choose axis order based on normal direction
+ if abs(normal[1]) > abs(normal[2]): # if Y component is dominant
+ self.axis_order = [0, 1, 2] # swapping Y and Z
+ else:
+ self.axis_order =[0, 2, 1] # keep default order
+
+ self.mesh.vertices = self.mesh.vertices[:, self.axis_order]
+ self.joints = self.joints[:, self.axis_order]
+ self.normalize_coordinates()
+
+ def normalize_coordinates(self):
+
+ # Compute scale and offset
+ scale = 1.0 / (self.bbox_scale+1e-5)
+ offset = -self.bbox_center
+
+ self.mesh.vertices = (self.mesh.vertices + offset) * scale
+ self.joints = (self.joints + offset) * scale
+
+ # Calculate appropriate radii based on the mean size
+ self.joint_radius = 0.01
+ self.bone_radius = 0.005
+
+ def query_mesh_rig(self):
+
+ input_dict = {"shape": self.mesh}
+
+ # Create joints as spheres
+ joint_meshes = []
+ for i, joint in enumerate(self.joints):
+
+ sphere = trimesh.creation.icosphere(
+ radius=self.joint_radius, subdivisions=2
+ )
+ sphere.apply_translation(joint)
+ if i == self.root_idx:
+ # root green
+ sphere.visual.vertex_colors = [0, 255, 0, 255]
+ else:
+ sphere.visual.vertex_colors = [0, 0, 255, 255]
+
+ joint_meshes.append(sphere)
+ input_dict["joint_meshes"] = trimesh.util.concatenate(joint_meshes)
+
+ # Create bones as cylinders
+ bone_meshes = []
+ for bone in self.bones:
+ start, end = self.joints[bone[0]], self.joints[bone[1]]
+ cylinder = trimesh.creation.cylinder(radius=self.bone_radius, segment=np.array([[0, 0, 0], end - start]))
+ cylinder.apply_translation(start)
+ cylinder.visual.vertex_colors = [255, 0, 0, 255] #[0, 0, 255, 255] # blue
+ bone_meshes.append(cylinder)
+ input_dict["bone_meshes"] = trimesh.util.concatenate(bone_meshes)
+
+ return input_dict
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/data_utils/pyrender_wrapper.py b/third_party/Puppeteer/skeleton/data_utils/pyrender_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..5354edd06a3054fe7ca26237209834be4f15551d
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/pyrender_wrapper.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2023 Gengshan Yang
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: MIT
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under MIT, with the full license text
+# available at https://github.com/lab4d-org/lab4d/blob/main/LICENSE.
+#
+# This modified file is released under the same license.
+
+import os
+import numpy as np
+import cv2
+import pyrender
+import trimesh
+from pyrender import (
+ IntrinsicsCamera,
+ Mesh,
+ Node,
+ Scene,
+ OffscreenRenderer,
+ MetallicRoughnessMaterial,
+ RenderFlags
+)
+
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+
+def look_at(eye, center, up):
+ """Create a look-at (view) matrix."""
+ f = np.array(center, dtype=np.float32) - np.array(eye, dtype=np.float32)
+ f /= np.linalg.norm(f)
+
+ u = np.array(up, dtype=np.float32)
+ u /= np.linalg.norm(u)
+
+ s = np.cross(f, u)
+ u = np.cross(s, f)
+
+ m = np.identity(4, dtype=np.float32)
+ m[0, :3] = s
+ m[1, :3] = u
+ m[2, :3] = -f
+ m[:3, 3] = -np.matmul(m[:3, :3], np.array(eye, dtype=np.float32))
+
+ return m
+
+class PyRenderWrapper:
+ def __init__(self, image_size=(1024, 1024)) -> None:
+ # renderer
+ self.image_size = image_size
+ render_size = max(image_size)
+ self.r = OffscreenRenderer(render_size, render_size)
+ self.intrinsics = IntrinsicsCamera(
+ render_size, render_size, render_size / 2, render_size / 2
+ )
+ # light
+ self.light_pose = np.eye(4)
+ self.set_light_topdown()
+ self.direc_l = pyrender.DirectionalLight(color=np.ones(3), intensity=5.0)
+ self.material = MetallicRoughnessMaterial(
+ roughnessFactor=0.75, metallicFactor=0.75, alphaMode="BLEND"
+ )
+ self.init_camera()
+
+ def init_camera(self):
+ self.flip_pose = np.eye(4)
+ self.set_camera(np.eye(4))
+
+ def set_camera(self, scene_to_cam):
+ # object to camera transforms
+ self.scene_to_cam = self.flip_pose @ scene_to_cam
+
+ def set_light_topdown(self, gl=False):
+ # top down light, slightly closer to the camera
+ if gl:
+ rot = cv2.Rodrigues(np.asarray([-np.pi / 2, 0, 0]))[0]
+ else:
+ rot = cv2.Rodrigues(np.asarray([np.pi / 2, 0, 0]))[0]
+ self.light_pose[:3, :3] = rot
+
+ def align_light_to_camera(self):
+ self.light_pose = np.linalg.inv(self.scene_to_cam)
+
+ def set_intrinsics(self, intrinsics):
+ """
+ Args:
+ intrinsics: (4,) fx,fy,px,py
+ """
+ self.intrinsics = IntrinsicsCamera(
+ intrinsics[0], intrinsics[1], intrinsics[2], intrinsics[3]
+ )
+
+ def get_cam_to_scene(self):
+ cam_to_scene = np.eye(4)
+ cam_to_scene[:3, :3] = self.scene_to_cam[:3, :3].T
+ cam_to_scene[:3, 3] = -self.scene_to_cam[:3, :3].T @ self.scene_to_cam[:3, 3]
+ return cam_to_scene
+
+ def set_camera_view(self, angle, bbox_center, distance=2.0):
+ # Calculate camera position based on angle and distance from bounding box center
+ camera_position = bbox_center + distance * np.array([np.sin(angle), 0, np.cos(angle)], dtype=np.float32)
+ look_at_matrix = look_at(camera_position, bbox_center, [0, 1, 0])
+ self.scene_to_cam = look_at_matrix @ self.flip_pose
+
+ def render(self, input_dict):
+ # Create separate scenes for transparent objects (mesh) and solid objects (joints and bones)
+ scene_transparent = Scene(ambient_light=np.array([1.0, 1.0, 1.0, 1.0]) * 0.1)
+ scene_solid = Scene(ambient_light=np.array([1.0, 1.0, 1.0, 1.0]) * 0.1)
+
+ mesh_pyrender = Mesh.from_trimesh(input_dict["shape"], smooth=False)
+ mesh_pyrender.primitives[0].material = self.material
+ scene_transparent.add(mesh_pyrender, pose=np.eye(4), name="shape")
+
+ if "joint_meshes" in input_dict:
+ joints_pyrender = Mesh.from_trimesh(input_dict["joint_meshes"], smooth=False)
+ joints_pyrender.primitives[0].material = self.material
+ scene_solid.add(joints_pyrender, pose=np.eye(4), name="joints")
+
+ if "bone_meshes" in input_dict:
+ bones_pyrender = Mesh.from_trimesh(input_dict["bone_meshes"], smooth=False)
+ bones_pyrender.primitives[0].material = self.material
+ scene_solid.add(bones_pyrender, pose=np.eye(4), name="bones")
+
+ # Camera for both scenes
+ scene_transparent.add(self.intrinsics, pose=self.get_cam_to_scene())
+ scene_solid.add(self.intrinsics, pose=self.get_cam_to_scene())
+
+ # Light for both scenes
+ scene_transparent.add(self.direc_l, pose=self.light_pose)
+ scene_solid.add(self.direc_l, pose=self.light_pose)
+
+ # Render transparent scene first
+ color_transparent, depth_transparent = self.r.render(scene_transparent)
+
+ # Render solid scene on top
+ color_solid, depth_solid = self.r.render(scene_solid)
+
+ # Combine the two scenes
+ color_combined = np.where(depth_solid[..., np.newaxis] == 0, color_transparent, color_solid)
+
+ return color_combined, depth_solid
+ def delete(self):
+ self.r.delete()
diff --git a/third_party/Puppeteer/skeleton/data_utils/read_npz.py b/third_party/Puppeteer/skeleton/data_utils/read_npz.py
new file mode 100644
index 0000000000000000000000000000000000000000..b10023b1586a98124bf9d260f843566e4f57a360
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/read_npz.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import scipy.sparse as sp
+
+# Load the NPZ file
+data = np.load('articulation_xlv2_test.npz', allow_pickle=True)
+data_list = data['arr_0']
+
+print(f"Loaded {len(data_list)} data entries")
+print(f"Data keys: {data_list[0].keys()}")
+# 'vertices', 'faces', 'normals', 'joints', 'bones', 'root_index', 'uuid', 'pc_w_norm', 'joint_names', 'skinning_weights_value',
+# 'skinning_weights_row', 'skinning_weights_col', 'skinning_weights_shape'
+
+data = data_list[0] # check the first data
+
+vertices = data['vertices'] # (n_vertex, 3)
+faces = data['faces'] # (n_faces, 3)
+normals = data['normals'] # (n_vertex, 3)
+joints = data['joints'] # (n_joints, 3)
+bones = data['bones'] # (n_bones, 2)
+pc_w_norm = data['pc_w_norm'] # (8192, 6)
+
+# Extract the sparse skinning weights components
+skinning_data = data['skinning_weights_value']
+skinning_rows = data['skinning_weights_row']
+skinning_cols = data['skinning_weights_col']
+skinning_shape = data['skinning_weights_shape']
+
+skinning_sparse = sp.coo_matrix((skinning_data, (skinning_rows, skinning_cols)), shape=skinning_shape)
+skinning_weights = skinning_sparse.toarray() # (n_vertex, n_joints)
+
diff --git a/third_party/Puppeteer/skeleton/data_utils/read_rig_mesh_from_glb.py b/third_party/Puppeteer/skeleton/data_utils/read_rig_mesh_from_glb.py
new file mode 100644
index 0000000000000000000000000000000000000000..51b1fadbb4aab42c6b6395a0f41c64d2cb2fbeb2
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/read_rig_mesh_from_glb.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Blender script for extracting rig (.txt) and mesh (.obj) from glbs.
+This code currently supports GLB files only, but it can be easily modified to load other formats (e.g., FBX, DAE) with minimal changes.
+"""
+
+import bpy
+import os
+import re
+import json
+import pickle
+
+def get_hierarchy_root_joint(joint):
+ """
+ Function to find the top parent joint node from the given
+ 'joint' Blender node (armature bone).
+ """
+ root_joint = joint
+ while root_joint.parent is not None:
+ root_joint = root_joint.parent
+ return root_joint
+
+def get_meshes_and_armatures():
+ """
+ Function to get all meshes and armatures in the scene
+ """
+ default_objects = ['Cube', 'Light', 'Camera', 'Icosphere']
+ for obj_name in default_objects:
+ if obj_name in bpy.data.objects:
+ bpy.data.objects.remove(bpy.data.objects[obj_name], do_unlink=True)
+
+ meshes = [obj for obj in bpy.context.scene.objects if obj.type == 'MESH']
+ armatures = [obj for obj in bpy.context.scene.objects if obj.type == 'ARMATURE']
+ return meshes, armatures
+
+def get_joint_dict(root):
+ """
+ Function to create a dictionary of joints from the root joint
+ """
+ joint_pos = {}
+ def traverse_bone(bone):
+ joint_pos[bone.name] = {
+ 'pos': bone.head_local,
+ 'pa': bone.parent.name if bone.parent else 'None',
+ 'ch': [child.name for child in bone.children]
+ }
+ for child in bone.children:
+ traverse_bone(child)
+
+ traverse_bone(root)
+ return joint_pos
+
+def record_info(root, joint_dict, meshes, mesh_vert_offsets, file_info):
+ """
+ - root: root joint
+ - joint_dict
+ - meshes
+ - mesh_vert_offsets: for multi-geometry
+ - file_info
+ """
+ skin_records = {}
+
+ def replace_special_characters(name):
+ return re.sub(r'\W+', '_', name)
+
+ for key, val in joint_dict.items():
+ modified_key = replace_special_characters(key)
+ file_info.write(f'joints {modified_key} {val["pos"][0]:.8f} {val["pos"][1]:.8f} {val["pos"][2]:.8f}\n')
+ file_info.write(f'root {replace_special_characters(root.name)}\n')
+
+ for mesh_index, mesh in enumerate(meshes):
+ vert_offset = mesh_vert_offsets[mesh_index]
+ if mesh.type == 'MESH':
+ for vtx in mesh.data.vertices:
+ weights = {}
+ for group in vtx.groups:
+ bone_name = replace_special_characters(mesh.vertex_groups[group.group].name)
+ weights[bone_name] = group.weight
+
+ global_vertex_index = vert_offset + vtx.index
+
+ skin_record = f"skin {global_vertex_index} " + " ".join(f"{bone} {weight:.4f}" for bone, weight in weights.items())
+
+ if global_vertex_index not in skin_records:
+ skin_records[global_vertex_index] = skin_record
+ file_info.write(skin_record + "\n")
+
+ for key, val in joint_dict.items():
+ if val['pa'] != 'None':
+ parent_name = replace_special_characters(val['pa'])
+ child_name = replace_special_characters(key)
+ file_info.write(f'hier {parent_name} {child_name}\n')
+
+
+def record_obj(meshes, file_obj):
+ vert_offset = 0
+ norm_offset = 0
+ mesh_vert_offsets = []
+
+ for mesh in meshes:
+ mesh_vert_offsets.append(vert_offset)
+ bpy.context.view_layer.objects.active = mesh
+ bpy.ops.object.mode_set(mode='OBJECT')
+
+ # vertex
+ for v in mesh.data.vertices:
+ file_obj.write(f"v {v.co[0]} {v.co[1]} {v.co[2]}\n")
+ file_obj.write("\n")
+
+ # normal
+ for vn in mesh.data.vertices:
+ normal = vn.normal
+ file_obj.write(f"vn {normal[0]} {normal[1]} {normal[2]}\n")
+ file_obj.write("\n")
+
+ # face
+ for poly in mesh.data.polygons:
+ verts = [v + 1 + vert_offset for v in poly.vertices]
+ file_obj.write(f"f {verts[0]}//{verts[0]} {verts[1]}//{verts[1]} {verts[2]}//{verts[2]}\n")
+
+ vert_count = len(mesh.data.vertices)
+ vert_offset += vert_count
+ norm_offset += vert_count
+
+ return mesh_vert_offsets
+
+def process_glb(glb_path, rigs_dir, meshes_dir):
+ base_name = os.path.splitext(os.path.basename(glb_path))[0]
+
+ obj_name = os.path.join(meshes_dir, f'{base_name}.obj')
+ info_name = os.path.join(rigs_dir, f'{base_name}.txt')
+
+ # Skip processing if rig info file already exists
+ if os.path.exists(info_name):
+ print(f"{info_name} already exists. Skipping...")
+ return
+
+ if os.path.exists(obj_name):
+ print(f"{obj_name} already exists. Skipping...")
+ return
+
+ bpy.ops.wm.read_factory_settings(use_empty=True)
+ bpy.ops.import_scene.gltf(filepath=glb_path)
+
+ meshes, armatures = get_meshes_and_armatures()
+
+ if not armatures:
+ print(f"No armatures found in {glb_path}. Skipping...")
+ return
+
+ root = armatures[0].data.bones[0]
+ root_name = get_hierarchy_root_joint(root)
+ joint_dict = get_joint_dict(root_name)
+
+ # save meshes
+ with open(obj_name, 'w') as file_obj:
+ mesh_vert_offsets = record_obj(meshes, file_obj)
+
+ # save rigs
+ with open(info_name, 'w') as file_info:
+ record_info(root_name, joint_dict, meshes, mesh_vert_offsets, file_info)
+
+ print(f"Processed {glb_path}")
+
+if __name__ == '__main__':
+
+ src_dir = 'glbs'
+ rigs_dir = 'rigs'
+ meshes_dir = 'meshes'
+ # Ensure rigs directory exists
+ if not os.path.exists(rigs_dir):
+ os.makedirs(rigs_dir)
+ if not os.path.exists(meshes_dir):
+ os.makedirs(meshes_dir)
+
+ glb_paths = [os.path.join(src_dir, file) for file in os.listdir(src_dir) if file.endswith('.glb')]
+
+ print(len(glb_paths))
+
+ for glb_path in glb_paths:
+ try:
+ process_glb(glb_path, rigs_dir, meshes_dir)
+ except Exception as e:
+ with open('error.txt', 'a') as error_file:
+ error_file.write(f"{glb_path}: {str(e)}\n")
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/data_utils/render_data.py b/third_party/Puppeteer/skeleton/data_utils/render_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..4864cf53e03596782544f2167185beb724bb040e
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/render_data.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+import cv2
+
+from pyrender_wrapper import PyRenderWrapper
+from data_loader import DataLoader
+
+def main():
+ loader = DataLoader()
+
+ raw_size = (960, 960)
+ renderer = PyRenderWrapper(raw_size)
+
+ output_dir = 'render_results'
+ os.makedirs(output_dir, exist_ok=True)
+
+ rig_path = 'examples/0a59c5ffa4a1476bac6d540b79947f31.txt'
+ mesh_path = rig_path.replace('.txt', '.obj')
+
+ filename = os.path.splitext(os.path.basename(rig_path))[0]
+
+ loader.load_rig_data(rig_path)
+ loader.load_mesh(mesh_path)
+ input_dict = loader.query_mesh_rig()
+
+ angles = [0, np.pi/2, np.pi, 3*np.pi/2]
+
+ bbox_center = loader.mesh.bounding_box.centroid
+ bbox_size = loader.mesh.bounding_box.extents
+ distance = np.max(bbox_size) * 2
+
+ subfolder_path = os.path.join(output_dir, filename)
+
+ os.makedirs(subfolder_path, exist_ok=True)
+
+ for i, angle in enumerate(angles):
+ print(f"Rendering view at {np.degrees(angle)} degrees")
+
+ renderer.set_camera_view(angle, bbox_center, distance)
+ renderer.align_light_to_camera()
+
+ color = renderer.render(input_dict)[0]
+
+ output_filename = f"{filename}_view{i+1}.png"
+ output_filepath = os.path.join(subfolder_path, output_filename)
+ cv2.imwrite(output_filepath, color)
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/data_utils/save_npz.py b/third_party/Puppeteer/skeleton/data_utils/save_npz.py
new file mode 100644
index 0000000000000000000000000000000000000000..78a84cc4e36f029d0723ec48a14cc746d54ced8e
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/data_utils/save_npz.py
@@ -0,0 +1,256 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This python script shows how we process the meshes and rigs from the input folders and save them in a compressed npz file.
+"""
+import os
+import numpy as np
+import glob
+import pickle
+from concurrent.futures import ProcessPoolExecutor
+import skimage.measure
+import trimesh
+import mesh2sdf.core
+import scipy.sparse as sp
+
+def read_obj_file(file_path):
+ vertices = []
+ faces = []
+ normals = [] # Added normals list
+
+ with open(file_path, 'r') as file:
+ for line in file:
+ if line.startswith('v '):
+ parts = line.split()[1:]
+ vertices.append([float(parts[0]), float(parts[1]), float(parts[2])])
+ elif line.startswith('vn '): # Added reading normals
+ parts = line.split()[1:]
+ normals.append([float(parts[0]), float(parts[1]), float(parts[2])])
+ elif line.startswith('f '):
+ parts = line.split()[1:]
+ # OBJ format is 1-based, we need 0-based for npz
+ face = [int(part.split('//')[0]) - 1 for part in parts]
+ faces.append(face)
+
+ return np.array(vertices), np.array(faces), np.array(normals)
+
+def read_rig_file(file_path):
+ """
+ Read rig from txt file, our format is the same as RigNet:
+ joints joint_name x y z
+ root root_joint_name
+ skin vertex_idx joint_name weight joint_name weight ...
+ hier parent_joint_name child_joint_name
+ """
+ joints = []
+ bones = []
+ joint_names = []
+
+ joint_mapping = {}
+ joint_index = 0
+
+ skinning_data = {} # Dictionary to store vertex index -> [(joint_idx, weight), ...]
+
+ with open(file_path, 'r') as file:
+ lines = file.readlines()
+
+ for line in lines:
+ parts = line.split()
+ if line.startswith('joints'):
+ name = parts[1]
+ position = [float(parts[2]), float(parts[3]), float(parts[4])]
+ joints.append(position)
+ joint_names.append(name)
+ joint_mapping[name] = joint_index
+ joint_index += 1
+ elif line.startswith('hier'):
+ parent_joint = joint_mapping[parts[1]]
+ child_joint = joint_mapping[parts[2]]
+ bones.append([parent_joint, child_joint])
+ elif line.startswith('root'):
+ root = joint_mapping[parts[1]]
+ elif line.startswith('skin'):
+ vertex_idx = int(parts[1])
+
+ if vertex_idx not in skinning_data:
+ skinning_data[vertex_idx] = []
+
+ for i in range(2, len(parts), 2):
+ if i+1 < len(parts):
+ joint_name = parts[i]
+ weight = float(parts[i+1])
+
+ if joint_name in joint_mapping:
+ joint_idx = joint_mapping[joint_name]
+ skinning_data[vertex_idx].append((joint_idx, weight))
+
+ return np.array(joints), np.array(bones), root, joint_names, skinning_data
+
+def convert_to_sparse_skinning(skinning_data, num_vertices, num_joints):
+ """Convert skinning weights to sparse matrix format."""
+ rows = []
+ cols = []
+ data = []
+
+ for vertex_idx, weights in skinning_data.items():
+ for joint_idx, weight in weights:
+ rows.append(vertex_idx)
+ cols.append(joint_idx)
+ data.append(weight)
+
+ sparse_skinning = sp.coo_matrix((data, (rows, cols)), shape=(num_vertices, num_joints))
+
+ # Return as tuple of arrays which can be serialized
+ return (sparse_skinning.data, sparse_skinning.row, sparse_skinning.col, sparse_skinning.shape)
+
+def normalize_to_unit_cube(vertices, normals=None, scale_factor=1.0):
+ min_coords = vertices.min(axis=0)
+ max_coords = vertices.max(axis=0)
+ center = (max_coords + min_coords) / 2.0
+
+ vertices -= center
+ scale = 1.0 / np.abs(vertices).max() * scale_factor
+ vertices *= scale
+
+ if normals is not None:
+ # Normalize each normal vector to unit length
+ norms = np.linalg.norm(normals, axis=1, keepdims=True)
+ normals = normals / (norms+1e-8)
+
+ return vertices, normals, center, scale
+ else:
+ return vertices, center, scale
+
+def normalize_vertices(vertices, scale=0.9):
+ bbmin, bbmax = vertices.min(0), vertices.max(0)
+ center = (bbmin + bbmax) * 0.5
+ scale = 2.0 * scale / (bbmax - bbmin).max()
+ vertices = (vertices - center) * scale
+ return vertices, center, scale
+
+def export_to_watertight(normalized_mesh, octree_depth: int = 7):
+ """
+ Convert the non-watertight mesh to watertight.
+
+ Args:
+ input_path (str): normalized path
+ octree_depth (int):
+
+ Returns:
+ mesh(trimesh.Trimesh): watertight mesh
+
+ """
+ size = 2 ** octree_depth
+ level = 2 / size
+
+ scaled_vertices, to_orig_center, to_orig_scale = normalize_vertices(normalized_mesh.vertices)
+
+ sdf = mesh2sdf.core.compute(scaled_vertices, normalized_mesh.faces, size=size)
+
+ vertices, faces, normals, _ = skimage.measure.marching_cubes(np.abs(sdf), level)
+
+ # watertight mesh
+ vertices = vertices / size * 2 - 1 # -1 to 1
+ vertices = vertices / to_orig_scale + to_orig_center
+ mesh = trimesh.Trimesh(vertices, faces, normals=normals)
+
+ return mesh
+
+def process_mesh_to_pc(mesh, marching_cubes = True, sample_num = 8192):
+ if marching_cubes:
+ mesh = export_to_watertight(mesh)
+ return_mesh = mesh
+ points, face_idx = mesh.sample(sample_num, return_index=True)
+ points, _, _ = normalize_to_unit_cube(points, scale_factor=0.9995)
+ normals = mesh.face_normals[face_idx]
+
+ pc_normal = np.concatenate([points, normals], axis=-1, dtype=np.float16)
+ return pc_normal, return_mesh
+
+def process_single_file(args):
+ mesh_file, rig_file = args
+ mesh_name = os.path.basename(mesh_file).split('.')[0]
+ rig_name = os.path.basename(rig_file).split('.')[0]
+
+ if mesh_name != rig_name:
+ print(f"Skipping files {mesh_file} and {rig_file} because their names do not match.")
+ return None
+
+ vertices, faces, normals = read_obj_file(mesh_file)
+
+ joints, bones, root, joint_names, skinning_data = read_rig_file(rig_file)
+
+ # Normalize the mesh to the unit cube centered at the origin
+ vertices, normals, center, scale = normalize_to_unit_cube(vertices, normals, scale_factor=0.5)
+
+ # Apply the same transformation to joints
+ joints -= center
+ joints *= scale
+
+ # Create trimesh object for processing
+ mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+
+ # Process into point cloud with normals
+ pc_normal, _ = process_mesh_to_pc(mesh)
+
+ # Convert skinning data to sparse format
+ sparse_skinning = convert_to_sparse_skinning(skinning_data, len(vertices), len(joints))
+
+ return {
+ 'vertices': vertices,
+ 'faces': faces,
+ 'normals': normals,
+ 'joints': joints,
+ 'bones': bones,
+ 'root_index': root,
+ 'uuid': mesh_name,
+ 'pc_w_norm': pc_normal,
+ 'joint_names': joint_names,
+ 'skinning_weights_value': sparse_skinning[0], # values
+ 'skinning_weights_rows': sparse_skinning[1], # row indices
+ 'skinning_weights_cols': sparse_skinning[2], # column indices
+ 'skinning_weights_shape': sparse_skinning[3] # shape of matrix
+ }
+
+def process_files(mesh_folder, rig_folder, output_file, num_workers=8):
+ file_pairs = []
+
+ for root, _, files in os.walk(rig_folder):
+ for file in files:
+ if file.endswith('.txt'):
+ rig_file = os.path.join(root, file)
+ obj_base_name = os.path.splitext(file)[0]
+ mesh_file = os.path.join(mesh_folder, obj_base_name + '.obj')
+ if os.path.exists(mesh_file):
+ file_pairs.append((mesh_file, rig_file))
+ else:
+ print(f"Mesh file not found: {mesh_file}")
+
+ with ProcessPoolExecutor(max_workers=num_workers) as executor:
+ data_list = list(executor.map(process_single_file, file_pairs))
+
+ data_list = [data for data in data_list if data is not None]
+
+ np.savez_compressed(output_file, data_list, allow_pickle=True)
+
+def main():
+ # Example usage
+ mesh_folder = 'meshes/'
+ rig_folder = 'rigs/'
+ output_file = 'results.npz'
+
+ process_files(mesh_folder, rig_folder, output_file)
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/demo.py b/third_party/Puppeteer/skeleton/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..7120d0bf77eb0169b9644a73bf741e63fc76dc3a
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/demo.py
@@ -0,0 +1,219 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import torch
+import trimesh
+import argparse
+import numpy as np
+
+from tqdm import tqdm
+from trimesh import Scene
+
+from accelerate import Accelerator
+from accelerate.utils import set_seed
+from accelerate.utils import DistributedDataParallelKwargs
+
+from skeleton_models.skeletongen import SkeletonGPT
+from data_utils.save_npz import normalize_to_unit_cube
+from utils.mesh_to_pc import MeshProcessor
+from utils.save_utils import save_mesh, pred_joints_and_bones, save_skeleton_to_txt, save_skeleton_to_txt_joint, save_args, \
+ merge_duplicate_joints_and_fix_bones, save_skeleton_obj, render_mesh_with_skeleton
+
+class Dataset:
+ def __init__(self, input_list, input_pc_num = 8192, apply_marching_cubes = True, octree_depth = 7, output_dir = None):
+ super().__init__()
+ self.data = []
+ self.output_dir = output_dir
+
+ mesh_list = []
+ for input_path in input_list:
+ ext = os.path.splitext(input_path)[1].lower()
+ if ext in ['.ply', '.stl', '.obj']:
+ cur_data = trimesh.load(input_path, force='mesh')
+ mesh_list.append(cur_data)
+ else:
+ print(f"Unsupported file type: {ext}")
+ if apply_marching_cubes:
+ print("First apply Marching Cubes and then sample point cloud, need time...")
+ pc_list = MeshProcessor.convert_meshes_to_point_clouds(mesh_list, input_pc_num, apply_marching_cubes = apply_marching_cubes, octree_depth = octree_depth)
+ for input_path, cur_data, mesh in zip(input_list, pc_list, mesh_list):
+ self.data.append({'pc_normal': cur_data, 'faces': mesh.faces, 'vertices': mesh.vertices, 'file_name': os.path.splitext(os.path.basename(input_path))[0]})
+ print(f"dataset total data samples: {len(self.data)}")
+
+ def __len__(self):
+ return len(self.data)
+
+ def __getitem__(self, idx):
+ data_dict = {}
+ data_dict['pc_normal'] = self.data[idx]['pc_normal']
+ # normalize pc coor
+ pc_coor = data_dict['pc_normal'][:, :3]
+ normals = data_dict['pc_normal'][:, 3:]
+ pc_coor, center, scale = normalize_to_unit_cube(pc_coor, scale_factor=0.9995)
+
+ data_dict['file_name'] = self.data[idx]['file_name']
+ pc_coor = pc_coor.astype(np.float32)
+ normals = normals.astype(np.float32)
+
+ point_cloud = trimesh.PointCloud(pc_coor)
+ point_cloud.metadata['normals'] = normals
+
+ try:
+ point_cloud.export(os.path.join(self.output_dir, f"{data_dict['file_name']}.ply"))
+ except Exception as e:
+ print(f"fail to save point clouds: {e}")
+
+ assert (np.linalg.norm(normals, axis=-1) > 0.99).all(), "normals should be unit vectors, something wrong"
+ data_dict['pc_normal'] = np.concatenate([pc_coor, normals], axis=-1, dtype=np.float16)
+
+ vertices = self.data[idx]['vertices']
+ faces = self.data[idx]['faces']
+ bounds = np.array([pc_coor.min(axis=0), pc_coor.max(axis=0)])
+ pc_center = (bounds[0] + bounds[1])[None, :] / 2
+ pc_scale = ((bounds[1] - bounds[0]).max() + 1e-5)
+ data_dict['transform_params'] = torch.tensor([
+ center[0], center[1], center[2],
+ scale,
+ pc_center[0][0], pc_center[0][1], pc_center[0][2],
+ pc_scale
+ ], dtype=torch.float32)
+ data_dict['vertices'] = vertices
+ data_dict['faces']= faces
+ return data_dict
+
+def get_args():
+ parser = argparse.ArgumentParser("SkeletonGPT", add_help=False)
+
+ parser.add_argument("--input_pc_num", default=8192, type=int)
+ parser.add_argument("--num_beams", default=1, type=int)
+ parser.add_argument('--input_dir', default=None, type=str, help="input mesh directory")
+ parser.add_argument('--input_path', default=None, type=str, help="input mesh path")
+ parser.add_argument("--output_dir", default="outputs", type=str)
+ parser.add_argument('--llm', default="facebook/opt-350m", type=str, help="The LLM backend")
+ parser.add_argument("--pad_id", default=-1, type=int, help="padding id")
+ parser.add_argument("--n_discrete_size", default=128, type=int, help="discretized 3D space")
+ parser.add_argument("--n_max_bones", default=100, type=int, help="max number of bones")
+ parser.add_argument('--dataset_path', default="combine_256_updated", type=str, help="data path")
+ parser.add_argument("--seed", default=0, type=int)
+ parser.add_argument("--precision", default="fp16", type=str)
+ parser.add_argument("--batchsize_per_gpu", default=1, type=int)
+ parser.add_argument('--pretrained_weights', default=None, type=str)
+ parser.add_argument('--save_name', default="infer_results", type=str)
+ parser.add_argument("--save_render", default=False, action="store_true", help="save rendering results of mesh with skel")
+ parser.add_argument("--apply_marching_cubes", default=False, action="store_true")
+ parser.add_argument("--octree_depth", default=7, type=int)
+ parser.add_argument("--hier_order", default=False, action="store_true")
+ parser.add_argument("--joint_token", default=False, action="store_true", help="use joint_based tokenization")
+ parser.add_argument("--seq_shuffle", default=False, action="store_true", help="shuffle the skeleton sequence")
+
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = get_args()
+
+ output_dir = f'{args.output_dir}/{args.save_name}'
+ os.makedirs(output_dir, exist_ok=True)
+ save_args(args, output_dir)
+
+ kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
+ accelerator = Accelerator(
+ kwargs_handlers=[kwargs],
+ mixed_precision=args.precision,
+ )
+
+ model = SkeletonGPT(args).cuda()
+
+ if args.pretrained_weights is not None:
+ pkg = torch.load(args.pretrained_weights, map_location=torch.device("cpu"))
+ model.load_state_dict(pkg["model"])
+ else:
+ raise ValueError("Pretrained weights must be provided.")
+ model.eval()
+ set_seed(args.seed)
+
+ # create dataset
+ if args.input_dir is not None:
+ input_list = sorted(os.listdir(args.input_dir))
+ input_list = [os.path.join(args.input_dir, x) for x in input_list if x.endswith('.ply') or x.endswith('.obj') or x.endswith('.stl')]
+ dataset = Dataset(input_list, args.input_pc_num, args.apply_marching_cubes, args.octree_depth, output_dir)
+ elif args.input_path is not None:
+ dataset = Dataset([args.input_path], args.input_pc_num, args.apply_marching_cubes, args.octree_depth, output_dir)
+ else:
+ raise ValueError("input_dir or input_path must be provided.")
+
+ dataloader = torch.utils.data.DataLoader(
+ dataset,
+ batch_size= 1,
+ drop_last = False,
+ shuffle = False,
+ )
+
+ dataloader, model = accelerator.prepare(dataloader, model)
+
+ for curr_iter, batch_data_label in tqdm(enumerate(dataloader), total=len(dataloader)):
+ with accelerator.autocast():
+ pred_bone_coords = model.generate(batch_data_label)
+
+ # determine the output file name
+ file_name = os.path.basename(batch_data_label['file_name'][0])
+ pred_skel_filename = os.path.join(output_dir, f'{file_name}_skel.obj')
+ pred_rig_filename = os.path.join(output_dir, f"{file_name}_pred.txt")
+ mesh_filename = os.path.join(output_dir, f"{file_name}_mesh.obj")
+
+ transform_params = batch_data_label['transform_params'][0].cpu().numpy()
+ trans = transform_params[:3]
+ scale = transform_params[3]
+ pc_trans = transform_params[4:7]
+ pc_scale = transform_params[7]
+ vertices = batch_data_label['vertices'][0].cpu().numpy()
+ faces = batch_data_label['faces'][0].cpu().numpy()
+
+ skeleton = pred_bone_coords[0].cpu().numpy()
+ pred_joints, pred_bones = pred_joints_and_bones(skeleton.squeeze())
+
+ # Post process: merge duplicate or nearby joints and deduplicate bones.
+ if args.hier_order: # for MagicArticulate hier order
+ pred_root_index = pred_bones[0][0]
+ pred_joints, pred_bones, pred_root_index = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones, root_index=pred_root_index)
+ else: # for Puppeteer or MagicArticulate spaital order
+ pred_joints, pred_bones = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones)
+ pred_root_index = None
+
+ # when save rig to txt, denormalize the skeletons to the same scale with input meshes
+ pred_joints_denorm = pred_joints * pc_scale + pc_trans # first align with point cloud
+ pred_joints_denorm = pred_joints_denorm / scale + trans # then align with original mesh
+
+ if args.joint_token:
+ pred_root_index = save_skeleton_to_txt_joint(pred_joints_denorm, pred_bones, pred_rig_filename)
+ else:
+ save_skeleton_to_txt(pred_joints_denorm, pred_bones, pred_root_index, args.hier_order, vertices, pred_rig_filename)
+
+ # save skeletons
+ if args.hier_order or args.joint_token:
+ save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, pred_root_index, use_cone=True)
+ else:
+ save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, use_cone=False)
+
+ # when saving mesh and rendering, use normalized vertices (-0.5,0.5)
+ vertices_norm = (vertices - trans) * scale
+ vertices_norm = (vertices_norm - pc_trans) / pc_scale
+ save_mesh(vertices_norm, faces, mesh_filename)
+
+ # render mesh w/ skeleton
+ if args.save_render:
+ if args.hier_order or args.joint_token:
+ render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred', root_idx=pred_root_index)
+ else:
+ render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred')
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/demo.sh b/third_party/Puppeteer/skeleton/demo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6029ed543f3d9ee88befb5e7c1fd7b6b13d416e6
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/demo.sh
@@ -0,0 +1,19 @@
+CUDA_VISIBLE_DEVICES=0 python demo.py --input_dir ./examples \
+ --pretrained_weights skeleton_ckpts/puppeteer_skeleton_w_diverse_pose.pth \
+ --save_name infer_results_demo --input_pc_num 8192 \
+ --save_render --apply_marching_cubes --joint_token --seq_shuffle
+
+# If you found the results not satisfactory, try the model trained with bone-based tokenization:
+
+# CUDA_VISIBLE_DEVICES=0 python demo.py --input_dir ./examples \
+# --pretrained_weights skeleton_ckpts/puppeteer_skeleton_w_diverse_pose_bone_token.pth \
+# --save_name infer_results_demo_bone_token --input_pc_num 8192 \
+# --save_render --apply_marching_cubes --hier_order --seq_shuffle
+
+
+# If you want to run the demo using MagicArticulate weights, run:
+
+# CUDA_VISIBLE_DEVICES=0 python demo.py --input_dir ./examples \
+# --pretrained_weights skeleton_ckpts/checkpoint_trainonv2_hier.pth \
+# --save_name infer_results_demo_magicarti --input_pc_num 8192 \
+# --save_render --apply_marching_cubes --hier_order
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/download.py b/third_party/Puppeteer/skeleton/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..be8eda31b6867649cc829b795a584a6371ac5f09
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/download.py
@@ -0,0 +1,25 @@
+from huggingface_hub import hf_hub_download
+
+file_path = hf_hub_download(
+ repo_id="Maikou/Michelangelo",
+ filename="checkpoints/aligned_shape_latents/shapevae-256.ckpt",
+ local_dir="third_partys/Michelangelo"
+)
+
+file_path = hf_hub_download(
+ repo_id="Seed3D/Puppeteer",
+ filename="skeleton_ckpts/puppeteer_skeleton_w_diverse_pose.pth",
+ local_dir="skeleton"
+)
+
+file_path = hf_hub_download(
+ repo_id="Seed3D/Puppeteer",
+ filename="skeleton_ckpts/puppeteer_skeleton_wo_diverse_pose.pth",
+ local_dir="skeleton"
+)
+
+file_path = hf_hub_download(
+ repo_id="Seed3D/Puppeteer",
+ filename="skeleton_ckpts/puppeteer_skeleton_w_diverse_pose_bone_token.pth",
+ local_dir="skeleton"
+)
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/eval.sh b/third_party/Puppeteer/skeleton/eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..dce8ffeb48eadb5ab987b174451ec5ded541bfbf
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/eval.sh
@@ -0,0 +1,15 @@
+CUDA_VISIBLE_DEVICES=0 python evaluate.py --dataset_path articulation_xlv2_test.npz \
+ --pretrained_weights skeleton_ckpts/puppeteer_skeleton_w_diverse_pose.pth \
+ --save_name infer_results_xl --input_pc_num 8192 \
+ --save_render --joint_token --seq_shuffle
+
+# when evaluate on xl2.0-test, it needs time as we have 2000 data for inference.
+# change dataset_path and save_name when evaluating on other test sets.
+
+
+# If you also want to evaluate MagicArticulate, run:
+
+# CUDA_VISIBLE_DEVICES=0 python evaluate.py --dataset_path articulation_xlv2_test.npz \
+# --pretrained_weights skeleton_ckpts/checkpoint_trainonv2_hier.pth \
+# --save_name infer_results_xl_magicarti --input_pc_num 8192 \
+# --save_render --hier_order
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/evaluate.py b/third_party/Puppeteer/skeleton/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..a170aecf3d7ff809a2bf8a268fb032d9d437a867
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/evaluate.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import math
+import time
+import torch
+import argparse
+import numpy as np
+
+from tqdm import tqdm
+from accelerate import Accelerator
+from accelerate.utils import set_seed
+from accelerate.utils import DistributedDataParallelKwargs
+
+from skeleton_models.skeletongen import SkeletonGPT
+from utils.skeleton_data_loader import SkeletonData
+from utils.save_utils import save_mesh, pred_joints_and_bones, save_skeleton_to_txt, save_skeleton_to_txt_joint, save_args, \
+ merge_duplicate_joints_and_fix_bones, save_skeleton_obj, render_mesh_with_skeleton
+from utils.eval_utils import chamfer_dist, joint2bone_chamfer_dist, bone2bone_chamfer_dist
+
+
+def get_args():
+ parser = argparse.ArgumentParser("SkeletonGPT", add_help=False)
+
+ parser.add_argument("--input_pc_num", default=8192, type=int)
+ parser.add_argument("--num_beams", default=1, type=int)
+ parser.add_argument('--llm', default="facebook/opt-350m", type=str, help="The LLM backend")
+ parser.add_argument("--pad_id", default=-1, type=int, help="padding id")
+ parser.add_argument("--n_discrete_size", default=128, type=int, help="size of discretized 3D space")
+ parser.add_argument("--n_max_bones", default=100, type=int, help="max number of bones")
+ parser.add_argument('--dataset_path', default="Articulation_xlv2.npz", type=str, help="data path")
+ parser.add_argument("--output_dir", default="outputs", type=str)
+ parser.add_argument('--save_name', default="infer_results", type=str)
+ parser.add_argument("--save_render", default=False, action="store_true", help="save rendering results of mesh with skel")
+ parser.add_argument("--seed", default=0, type=int)
+ parser.add_argument("--precision", default="fp16", type=str)
+ parser.add_argument("--batchsize_per_gpu", default=1, type=int)
+ parser.add_argument('--pretrained_weights', default=None, type=str, help="path of pretrained models")
+ parser.add_argument("--hier_order", default=False, action="store_true", help="use hier order")
+ parser.add_argument("--joint_token", default=False, action="store_true", help="use joint_based tokenization")
+ parser.add_argument("--seq_shuffle", default=False, action="store_true", help="shuffle the skeleton sequence")
+
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = get_args()
+
+ dataset = SkeletonData.load(args, is_training=False)
+
+ dataloader = torch.utils.data.DataLoader(
+ dataset,
+ batch_size=1,
+ drop_last = False,
+ shuffle = False,
+ )
+
+ kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
+ accelerator = Accelerator(
+ kwargs_handlers=[kwargs],
+ mixed_precision=args.precision,
+ )
+
+ model = SkeletonGPT(args).cuda()
+
+ if args.pretrained_weights is not None:
+ pkg = torch.load(args.pretrained_weights, map_location=torch.device("cpu"))
+ model.load_state_dict(pkg["model"])
+ else:
+ raise ValueError("Pretrained weights must be provided.")
+
+ set_seed(args.seed)
+ dataloader, model = accelerator.prepare(
+ dataloader,
+ model,
+ )
+
+ model.eval()
+
+ output_dir = f'{args.output_dir}/{args.save_name}'
+ print(output_dir)
+ os.makedirs(output_dir, exist_ok=True)
+ save_args(args, output_dir)
+
+ gt_samples, pred_samples = [], []
+ avg_j2j_cd, avg_j2b_cd, avg_b2b_cd = 0.0, 0.0, 0.0
+ infer_all_time = []
+ num_valid = 0
+ results_file = f'{output_dir}/evaluate_results.txt'
+
+ for curr_iter, batch_data_label in tqdm(enumerate(dataloader), total=len(dataloader)):
+ start_time = time.time()
+ with accelerator.autocast():
+ pred_bone_coords = model.generate(batch_data_label)
+ infer_time_pre_mesh = time.time() - start_time
+ infer_all_time.append(infer_time_pre_mesh)
+
+ if pred_bone_coords is None:
+ continue
+ print(pred_bone_coords.shape)
+
+ if pred_bone_coords.shape[1] > 0:
+ gt_joints = batch_data_label['joints'].squeeze(0).cpu().numpy()
+ gt_bones = batch_data_label['bones'].squeeze(0).cpu().numpy()
+
+ pred_joints, pred_bones = pred_joints_and_bones(pred_bone_coords.cpu().numpy().squeeze(0))
+ if pred_bones.shape[0] == 0:
+ continue
+
+ # Post process: merge duplicate or nearby joints and deduplicate bones.
+ if args.hier_order: # for MagicArticulate hier order
+ pred_root_index = pred_bones[0][0]
+ pred_joints, pred_bones, pred_root_index = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones, root_index=pred_root_index)
+ else: # for Puppeteer or MagicArticulate spaital order
+ pred_joints, pred_bones = merge_duplicate_joints_and_fix_bones(pred_joints, pred_bones)
+ pred_root_index = None
+
+ gt_root_index = int(batch_data_label['root_index'][0])
+ gt_joints, gt_bones, gt_root_index = merge_duplicate_joints_and_fix_bones(gt_joints, gt_bones, root_index=gt_root_index) # also merge duplicate joints/bones for GT to prevent NaNs in CD computation.
+
+ if gt_bones.shape[0] == 0 or pred_bones.shape[0] == 0:
+ continue
+
+ ### calculate CD
+ j2j_cd = chamfer_dist(pred_joints, gt_joints)
+ j2b_cd = joint2bone_chamfer_dist(pred_joints, pred_bones, gt_joints, gt_bones)
+ b2b_cd = bone2bone_chamfer_dist(pred_joints, pred_bones, gt_joints, gt_bones)
+
+ if math.isnan(j2j_cd) or math.isnan(j2b_cd) or math.isnan(b2b_cd):
+ print("NaN cd")
+ else:
+ num_valid += 1
+ avg_j2j_cd += j2j_cd
+ avg_j2b_cd += j2b_cd
+ avg_b2b_cd += b2b_cd
+ print(f"For {batch_data_label['uuid'][0]}, J2J Chamfer Distance: {j2j_cd:.7f}, J2B Chamfer Distance: {j2b_cd:.7f}, B2B Chamfer Distance: {b2b_cd:.7f}, infer time: {infer_time_pre_mesh:.7f}")
+ with open(results_file, 'a') as f:
+ f.write(f"For {batch_data_label['uuid'][0]}, J2J Chamfer Distance: {j2j_cd:.7f}, J2B Chamfer Distance: {j2b_cd:.7f}, B2B Chamfer Distance: {b2b_cd:.7f}, infer time: {infer_time_pre_mesh:.7f}\n")
+
+ if len(gt_samples) <= 30: # only save the first 30 results now, change to 2000 to save all
+ pred_samples.append((pred_joints, pred_bones, pred_root_index))
+ gt_samples.append((gt_joints, gt_bones, batch_data_label['vertices'][0], batch_data_label['faces'][0], batch_data_label['transform_params'][0], batch_data_label['uuid'][0], gt_root_index))
+
+ with open(results_file, 'a') as f:
+ f.write(f"Average J2J Chamfer Distance: {avg_j2j_cd/num_valid:.7f}\n")
+ f.write(f"Average J2B Chamfer Distance: {avg_j2b_cd/num_valid:.7f}\n")
+ f.write(f"Average B2B Chamfer Distance: {avg_b2b_cd/num_valid:.7f}\n")
+ f.write(f"Average inference time: {np.mean(infer_all_time):.7f}\n")
+ print(f"Valid generation: {num_valid}, Average J2J Chamfer Distance: {avg_j2j_cd/num_valid:.7f}, average J2B Chamfer Distance: {avg_j2b_cd/num_valid:.7f}, average B2B Chamfer Distance: {avg_b2b_cd/num_valid:.7f}, average infer time: {np.mean(infer_all_time):.7f}")
+
+ # save results
+ for i, ((pred_joints, pred_bones, pred_root_index), (gt_joints, gt_bones, vertices, faces, transform_params, file_name, gt_root_index)) in enumerate(zip(pred_samples, gt_samples)):
+ pred_skel_filename = f'{output_dir}/{file_name}_skel_pred.obj'
+ gt_skel_filename = f'{output_dir}/{file_name}_skel_gt.obj'
+ mesh_filename = f'{output_dir}/{file_name}.obj'
+ pred_rig_filename = f'{output_dir}/{file_name}_pred.txt'
+
+ vertices = vertices.cpu().numpy()
+ faces = faces.cpu().numpy()
+ trans = transform_params[:3].cpu().numpy()
+ scale = transform_params[3].cpu().numpy()
+ pc_trans = transform_params[4:7].cpu().numpy()
+ pc_scale = transform_params[7].cpu().numpy()
+
+ # save skeleton to .txt, denormalize the skeletons to align with input meshes
+ pred_joints_denorm = pred_joints * pc_scale + pc_trans # first align with point cloud
+ pred_joints_denorm = pred_joints_denorm / scale + trans # then align with original mesh
+
+ if args.joint_token:
+ pred_root_index = save_skeleton_to_txt_joint(pred_joints_denorm, pred_bones, pred_rig_filename)
+ else:
+ save_skeleton_to_txt(pred_joints_denorm, pred_bones, pred_root_index, args.hier_order, vertices, pred_rig_filename)
+
+ # save skeletons
+ if args.hier_order or args.joint_token:
+ save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, pred_root_index, use_cone=True)
+ else:
+ save_skeleton_obj(pred_joints, pred_bones, pred_skel_filename, use_cone=False)
+ save_skeleton_obj(gt_joints, gt_bones, gt_skel_filename, gt_root_index, use_cone=True)
+
+ # save mesh
+ # when saving mesh and rendering, use normalized vertices (-0.5,0.5)
+ vertices_norm = (vertices - trans) * scale
+ vertices_norm = (vertices_norm - pc_trans) / pc_scale
+ save_mesh(vertices_norm, faces, mesh_filename)
+
+ # render mesh w/ skeleton
+ if args.save_render:
+ if args.hier_order or args.joint_token:
+ render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred', root_idx=pred_root_index)
+ else:
+ render_mesh_with_skeleton(pred_joints, pred_bones, vertices_norm, faces, output_dir, file_name, prefix='pred')
+ render_mesh_with_skeleton(gt_joints, gt_bones, vertices_norm, faces, output_dir, file_name, prefix='gt', root_idx=gt_root_index)
+
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/skeleton_models/.DS_Store b/third_party/Puppeteer/skeleton/skeleton_models/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/third_party/Puppeteer/skeleton/skeleton_models/.DS_Store differ
diff --git a/third_party/Puppeteer/skeleton/skeleton_models/skeleton_opt.py b/third_party/Puppeteer/skeleton/skeleton_models/skeleton_opt.py
new file mode 100644
index 0000000000000000000000000000000000000000..722b165b04af4e0d1c1899af4d7424a8917df120
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/skeleton_models/skeleton_opt.py
@@ -0,0 +1,429 @@
+# Copyright (c) 2023 S-Lab
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: S-Lab License 1.0
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under S-Lab License 1.0, with the full license text
+# available at https://github.com/buaacyw/MeshAnything/blob/main/LICENSE.txt.
+#
+# This modified file is released under the same license.
+
+from transformers import AutoModelForCausalLM, AutoConfig, OPTConfig
+from transformers.models.opt.modeling_opt import OPTForCausalLM, OPTModel, OPTDecoder, OPTLearnedPositionalEmbedding, OPTDecoderLayer
+from typing import List, Optional, Tuple, Union
+from transformers.modeling_outputs import (
+ CausalLMOutputWithPast,
+)
+import torch
+from torch import nn
+from torch.nn import CrossEntropyLoss
+from transformers.utils import replace_return_docstrings
+from transformers.modeling_outputs import BaseModelOutputWithPast
+
+class SkeletonOPTConfig(OPTConfig):
+ model_type = "skeleton_opt"
+
+class SkeletonOPT(OPTForCausalLM):
+ config_class = SkeletonOPTConfig
+ def __init__(self, config: SkeletonOPTConfig):
+ super(OPTForCausalLM, self).__init__(config)
+ self.model = SkeletonOPTModel(config)
+ self.lm_head = nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class="OPTConfig")
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ bone_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ target_pos_embed: Optional[torch.Tensor] = None,
+ head_mask: Optional[torch.Tensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ labels: Optional[torch.LongTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, CausalLMOutputWithPast]:
+ r"""
+ Args:
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
+ provide it.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+ head_mask (`torch.Tensor` of shape `(num_hidden_layers, num_attention_heads)`, *optional*):
+ Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
+
+ - 1 indicates the head is **not masked**,
+ - 0 indicates the head is **masked**.
+
+ past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+ Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+ shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
+ shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`. The two additional
+ tensors are only required when the model is used as a decoder in a Sequence to Sequence model.
+
+ Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
+ cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+
+ If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
+ that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
+ all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
+ This is useful if you want more control over how to convert `input_ids` indices into associated vectors
+ than the model's internal embedding lookup matrix.
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+ (see `past_key_values`).
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+ for more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+
+ Returns:
+
+ Example:
+
+ ```python
+ >>> from transformers import AutoTokenizer, OPTForCausalLM
+
+ >>> model = OPTForCausalLM.from_pretrained("facebook/opt-350m")
+ >>> tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+
+ >>> prompt = "Hey, are you conscious? Can you talk to me?"
+ >>> inputs = tokenizer(prompt, return_tensors="pt")
+
+ >>> # Generate
+ >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+ >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+ "Hey, are you conscious? Can you talk to me?\nI'm not conscious. I'm just a little bit of a weirdo."
+ ```"""
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+ )
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+ outputs = self.model.decoder(
+ input_ids = input_ids,
+ bone_ids = bone_ids,
+ attention_mask=attention_mask,
+ target_pos_embed=target_pos_embed,
+ head_mask=head_mask,
+ past_key_values=past_key_values,
+ inputs_embeds=inputs_embeds,
+ use_cache=use_cache,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+
+ logits = self.lm_head(outputs[0]).contiguous()
+
+ loss = None
+ if labels is not None:
+ # move labels to correct device to enable model parallelism
+ labels = labels.to(logits.device)
+ # Shift so that tokens < n predict n
+ shift_logits = logits[..., :-1, :].contiguous()
+ shift_labels = labels[..., 1:].contiguous()
+ # Flatten the tokens
+ loss_fct = CrossEntropyLoss()
+ loss = loss_fct(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1))
+
+ if not return_dict:
+ output = (logits,) + outputs[1:]
+ return (loss,) + output if loss is not None else output
+
+ return CausalLMOutputWithPast(
+ loss=loss,
+ logits=logits,
+ past_key_values=outputs.past_key_values,
+ hidden_states=outputs.hidden_states,
+ attentions=outputs.attentions,
+ )
+
+class SkeletonOPTModel(OPTModel):
+ config_class = SkeletonOPTConfig
+ def __init__(self, config: SkeletonOPTConfig):
+ super(OPTModel,self).__init__(config)
+ self.decoder = SkeletonOPTDecoder(config)
+ # Initialize weights and apply final processing
+ self.post_init()
+
+class SkeletonOPTDecoder(OPTDecoder):
+ config_class = SkeletonOPTConfig
+ def __init__(self, config: SkeletonOPTConfig):
+ super(OPTDecoder,self).__init__(config)
+ self.config = config
+ self.dropout = config.dropout
+ self.layerdrop = config.layerdrop
+ self.padding_idx = config.pad_token_id
+ self.vocab_size = config.vocab_size
+ assert config.word_embed_proj_dim == config.hidden_size
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.word_embed_proj_dim, self.padding_idx)
+ self.hidden_size = config.hidden_size
+ self.word_embed_proj_dim = config.word_embed_proj_dim
+ self.n_discrete_size = config.n_discrete_size
+ self.joint_token = config.joint_token
+
+ self.embed_positions = OPTLearnedPositionalEmbedding(config.max_position_embeddings, config.hidden_size)
+ self.token_embed_positions = OPTBonePositionalEmbedding(config.bone_per_token+3, config.word_embed_proj_dim)
+
+ self.bone_per_token = config.bone_per_token
+ self.cond_length = config.cond_length
+ self.cond_embed = nn.Embedding(2, config.word_embed_proj_dim)
+ # Note that the only purpose of `config._remove_final_layer_norm` is to keep backward compatibility
+ # with checkpoints that have been fine-tuned before transformers v4.20.1
+ # see https://github.com/facebookresearch/metaseq/pull/164
+ if config.do_layer_norm_before and not config._remove_final_layer_norm:
+ self.final_layer_norm = nn.LayerNorm(
+ config.hidden_size, elementwise_affine=config.layer_norm_elementwise_affine
+ )
+ else:
+ self.final_layer_norm = None
+
+ self.layers = nn.ModuleList([OPTDecoderLayer(config) for _ in range(config.num_hidden_layers)])
+ self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
+
+ self.gradient_checkpointing = False
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ bone_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ target_pos_embed: Optional[torch.Tensor] = None,
+ head_mask: Optional[torch.Tensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
+ r"""
+ Args:
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
+ provide it.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+ head_mask (`torch.Tensor` of shape `(num_hidden_layers, num_attention_heads)`, *optional*):
+ Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
+
+ - 1 indicates the head is **not masked**,
+ - 0 indicates the head is **masked**.
+
+ past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+ Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+ shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
+
+ Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
+ cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+
+ If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
+ that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
+ all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
+
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
+ This is useful if you want more control over how to convert `input_ids` indices into associated vectors
+ than the model's internal embedding lookup matrix.
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
+ for more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+ """
+ # OPT Decoder
+ # print("used my Trans")
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+ # Transformer Decoder
+ if input_ids is not None and inputs_embeds is not None: # when training
+ pass
+ elif input_ids is not None: # when inference
+ assert not self.training
+ input_shape = input_ids.size()
+ input_ids = input_ids.view(-1, input_shape[-1])
+ inputs_embeds = self.embed_tokens(input_ids)
+ if target_pos_embed is not None:
+ if past_key_values is not None:
+ past_length = past_key_values[0][0].shape[2]
+ current_position = past_length - self.cond_length - 1
+
+ if current_position >= 0 and current_position < target_pos_embed.shape[1]:
+ token_position = 0 if input_ids.shape[1] == 1 else -1
+ inputs_embeds[:, token_position:token_position+1] += target_pos_embed[:, current_position:current_position+1]
+
+ bone_embeds = self.token_embed_positions(attention_mask[:, self.cond_length:], bone_ids, input_ids,
+ self.bone_per_token)
+ inputs_embeds += bone_embeds
+ cond_embed_query = torch.ones((inputs_embeds.shape[0], inputs_embeds.shape[1]), device=inputs_embeds.device,
+ dtype=inputs_embeds.dtype).long()
+ inputs_embeds = inputs_embeds + self.cond_embed(cond_embed_query)
+
+ elif inputs_embeds is not None: # when generate first skeleton token
+ assert not self.training
+ total_length = inputs_embeds.shape[1]
+ cond_embed_query = torch.zeros((inputs_embeds.shape[0], total_length), device=inputs_embeds.device,
+ dtype=inputs_embeds.dtype).long()
+ inputs_embeds = inputs_embeds + self.cond_embed(cond_embed_query)
+ else:
+ raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+
+ past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
+ # embed positions
+ if self._use_flash_attention_2:
+ # 2d mask is passed through the layers
+ assert attention_mask is not None
+ causal_attention_mask = attention_mask if 0 in attention_mask else None
+ else:
+ raise ValueError("Only flash_attention_2 is supported")
+
+ pos_embeds = self.embed_positions(attention_mask, past_key_values_length)
+
+ hidden_states = inputs_embeds + pos_embeds
+
+ # decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ next_decoder_cache = () if use_cache else None
+
+ # check if head_mask has a correct number of layers specified if desired
+ for attn_mask, mask_name in zip([head_mask], ["head_mask"]):
+ if attn_mask is not None:
+ if attn_mask.size()[0] != (len(self.layers)):
+ raise ValueError(
+ f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
+ f" {head_mask.size()[0]}."
+ )
+
+ for idx, decoder_layer in enumerate(self.layers):
+ # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ if self.training:
+ dropout_probability = torch.rand([])
+ if dropout_probability < self.layerdrop:
+ continue
+
+ past_key_value = past_key_values[idx] if past_key_values is not None else None
+
+ if self.gradient_checkpointing and self.training:
+ layer_outputs = self._gradient_checkpointing_func(
+ decoder_layer.__call__,
+ hidden_states,
+ causal_attention_mask,
+ head_mask[idx] if head_mask is not None else None,
+ None,
+ output_attentions,
+ use_cache,
+ )
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=causal_attention_mask,
+ layer_head_mask=(head_mask[idx] if head_mask is not None else None),
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+
+ if self.final_layer_norm is not None:
+ hidden_states = self.final_layer_norm(hidden_states)
+
+ # add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = next_decoder_cache if use_cache else None
+ if not return_dict:
+ return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+ return BaseModelOutputWithPast(
+ last_hidden_state=hidden_states,
+ past_key_values=next_cache,
+ hidden_states=all_hidden_states,
+ attentions=all_self_attns,
+ )
+
+class OPTBonePositionalEmbedding(nn.Embedding):
+ """
+ This module learns positional embeddings up to a fixed maximum size.
+ """
+
+ def __init__(self, num_embeddings: int, embedding_dim: int):
+ super().__init__(num_embeddings, embedding_dim)
+
+ def forward(self, attention_mask=None, bone_ids = None, input_ids = None, bone_per_token = None):
+ """`input_ids_shape` is expected to be [bsz x seqlen]."""
+ if bone_ids is not None:
+ return super().forward(bone_ids)
+
+ assert input_ids.shape[1] == 1
+ idx_in_extra = torch.isin(input_ids, torch.LongTensor([0, 1, 2]).to(input_ids.device))
+ cur_ids = input_ids.clone().detach()
+
+ cur_index = (attention_mask.sum(dim=1, keepdim=True) - 2) % bone_per_token + 3
+ cur_ids[~idx_in_extra]=cur_index[~idx_in_extra]
+
+ return super().forward(cur_ids)
+
+AutoConfig.register("skeleton_opt", SkeletonOPTConfig)
+AutoModelForCausalLM.register(SkeletonOPTConfig, SkeletonOPT)
+
diff --git a/third_party/Puppeteer/skeleton/skeleton_models/skeletongen.py b/third_party/Puppeteer/skeleton/skeleton_models/skeletongen.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0601ccc91673875d3339c8d91c2e283bdd1a37a
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/skeleton_models/skeletongen.py
@@ -0,0 +1,300 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from torch import nn
+from transformers import AutoModelForCausalLM
+from third_partys.Michelangelo.encode import load_model
+from skeleton_models.skeleton_opt import SkeletonOPTConfig
+
+def undiscretize(t, low, high, num_discrete):
+ assert (t >= 0).all() and (t <= num_discrete-1).all()
+ assert high > low
+ t = t.float()
+ t /= num_discrete
+ t = t * (high - low) + low
+ assert (t < high).all() and (t >= low).all()
+ return t
+
+class SkeletonGPT(nn.Module):
+ def __init__(self, args):
+ super().__init__()
+
+ self.args = args
+ self.point_encoder = load_model()
+
+ self.cond_length = 257
+ self.cond_dim = 768
+ self.joint_token = args.joint_token
+
+ self.n_discrete_size = args.n_discrete_size
+
+ if self.joint_token:
+ self.bone_per_token = 4 # (x,y,z,parend_index)
+ args.n_max_bones += 1 # add one for joints
+ else:
+ self.bone_per_token = 6 # (2 joints per bone, xyzxyz)
+ self.max_length = int(args.n_max_bones * self.bone_per_token + 2 + self.cond_length)
+ self.pad_id = -1
+
+ self.coor_continuous_range = (-0.5, 0.5)
+
+ vocab_size = self.n_discrete_size + 3 # 3 for bos, eos, pad
+ self.config = SkeletonOPTConfig.from_pretrained(
+ args.llm,
+ n_positions=self.max_length,
+ max_position_embeddings=self.max_length,
+ vocab_size = vocab_size,
+ _attn_implementation="flash_attention_2"
+ )
+
+ self.bos_token_id = 0
+ self.eos_token_id = 1
+ self.pad_token_id = 2
+
+ self.config.joint_token = self.joint_token
+
+ self.config.bos_token_id = self.bos_token_id
+ self.config.eos_token_id = self.eos_token_id
+ self.config.pad_token_id = self.pad_token_id
+ self.config._attn_implementation ="flash_attention_2"
+ self.config.n_discrete_size = self.n_discrete_size
+ self.config.bone_per_token = self.bone_per_token
+ self.config.cond_length = self.cond_length
+
+ self.config.word_embed_proj_dim = self.config.hidden_size # 1024
+
+ # target-aware indicator
+ if self.args.seq_shuffle:
+ self.feat_dim = self.config.word_embed_proj_dim
+ self.target_aware_pos_embed = nn.Parameter(torch.zeros(1, args.n_max_bones, self.config.word_embed_proj_dim))
+ nn.init.trunc_normal_(self.target_aware_pos_embed, 0., 0.02)
+
+ self.transformer = AutoModelForCausalLM.from_config(
+ config=self.config, attn_implementation="flash_attention_2")
+
+ self.cond_head_proj = nn.Linear(self.cond_dim, self.config.word_embed_proj_dim)
+ self.cond_proj = nn.Linear(self.cond_dim, self.config.word_embed_proj_dim)
+
+ self.eval()
+
+ def detokenize(self, input_ids):
+ # input_ids: torch.Tensor of shape (batch_size, seq_length)
+ batch_size = input_ids.size(0)
+
+ continuous_coors_list = []
+ num_bones_list = []
+
+ for i in range(batch_size):
+ cur_ids = input_ids[i] # Shape: (seq_length,)
+
+ # Remove padding tokens
+ cur_ids = cur_ids[cur_ids != self.pad_id] # Shape: (effective_seq_length,)
+
+ # Check if length is a multiple of 6 (2 joints * 3 coordinates)
+ if cur_ids.numel() % 6 != 0:
+ return None
+ # raise ValueError(f"Invalid length of input_ids in sample {i}. It should be a multiple of 6.")
+
+ num_bones = cur_ids.numel() // 6
+ num_bones_list.append(num_bones)
+
+ # Reshape into (num_bones, 6)
+ bone_coords = cur_ids.view(num_bones, 6) # Shape: (num_bones, 6)
+
+ # Undiscretize the coordinates
+ # Initialize tensor to hold bone coordinates
+ bones_coors = torch.zeros((num_bones, 2, 3), dtype=torch.float16, device=cur_ids.device)
+
+ for j in range(num_bones):
+ bone_coord = bone_coords[j] # Shape: (6,)
+
+ # Split into two joints
+ joint1_ids = bone_coord[:3]
+ joint2_ids = bone_coord[3:]
+
+ # Undiscretize joint coordinates
+ joint1_coords = undiscretize(joint1_ids, self.coor_continuous_range[0], self.coor_continuous_range[1], self.n_discrete_size)
+ joint2_coords = undiscretize(joint2_ids, self.coor_continuous_range[0], self.coor_continuous_range[1], self.n_discrete_size)
+
+ # Assign to bones_coors
+ bones_coors[j, 0, :] = joint1_coords
+ bones_coors[j, 1, :] = joint2_coords
+
+ continuous_coors_list.append(bones_coors)
+
+ max_num_bones = max(num_bones_list)
+
+ # Initialize the continuous_coors tensor with NaNs
+ continuous_coors = torch.full(
+ (batch_size, max_num_bones, 2, 3),
+ float('nan'),
+ dtype=torch.float16,
+ device=input_ids.device
+ )
+
+ # Place the bones_coors into continuous_coors
+ for i in range(batch_size):
+ num_bones = num_bones_list[i]
+ continuous_coors[i, :num_bones, :, :] = continuous_coors_list[i]
+
+ return continuous_coors # Shape: (batch_size, max_num_bones, 2, 3)
+
+ def detokenize_joint_token(self, input_ids):
+ # input_ids: torch.Tensor of shape (batch_size, seq_length)
+ batch_size = input_ids.size(0)
+
+ bones_coors_list = []
+ num_bones_list = []
+
+ for i in range(batch_size):
+ cur_ids = input_ids[i] # Shape: (seq_length,)
+
+ # Remove padding tokens
+ cur_ids = cur_ids[cur_ids != self.pad_id] # Shape: (effective_seq_length,)
+
+ # Check if length is a multiple of 4 (xyz + parent index)
+ if cur_ids.numel() % 4 != 0:
+ return None
+
+ num_joints = cur_ids.numel() // 4
+
+ # Reshape into (num_joints, 4)
+ joint_data = cur_ids.view(num_joints, 4)
+
+ # Undiscretize the coordinates
+ coords_discrete = joint_data[:, :3] # shape: (num_joints, 3)
+ coords_float = undiscretize(
+ coords_discrete,
+ self.coor_continuous_range[0],
+ self.coor_continuous_range[1],
+ self.n_discrete_size
+ )
+ parents = joint_data[:, 3]
+
+ ### recover bones
+ bone_coords = []
+ for child_idx in range(num_joints):
+ p = parents[child_idx].item()
+ if p > 0:
+ try:
+ parent_idx = p - 1
+ parent_coord = coords_float[parent_idx]
+ child_coord = coords_float[child_idx]
+ bone_coords.append([parent_coord, child_coord])
+ except:
+ return None
+ try:
+ bone_coords = torch.stack(
+ [torch.stack(pair, dim=0) for pair in bone_coords],
+ dim=0
+ ) # shape: (num_bones, 2, 3)
+ except:
+ return None
+ bones_coors_list.append(bone_coords)
+ num_bones_list.append(bone_coords.size(0))
+
+ max_num_bones = max(num_bones_list)
+
+ # Initialize the continuous_coors tensor with NaNs
+ continuous_coors = torch.full(
+ (batch_size, max_num_bones, 2, 3),
+ float('nan'),
+ dtype=torch.float16,
+ device=input_ids.device
+ )
+
+ # Place the bones_coors into continuous_coors
+ for i in range(batch_size):
+ num_bones = num_bones_list[i]
+ continuous_coors[i, :num_bones, :, :] = bones_coors_list[i]
+
+ return continuous_coors # Shape: (batch_size, max_num_bones, 2, 3)
+
+ # def forward(self, data_dict: dict, is_eval: bool = False) -> dict:
+ # return self.generate(data_dict)
+
+ def process_point_feature(self, point_feature):
+
+ encode_feature = torch.zeros(self.args.batchsize_per_gpu, self.cond_length, self.config.word_embed_proj_dim,
+ device=self.cond_head_proj.weight.device, dtype=self.cond_head_proj.weight.dtype)
+ encode_feature[:, 0] = self.cond_head_proj(point_feature[:, 0])
+ shape_latents = self.point_encoder.to_shape_latents(point_feature[:, 1:])
+
+ encode_feature[:, 1:] = self.cond_proj(shape_latents)
+
+ return encode_feature
+
+ @torch.no_grad()
+ def generate(self, data_dict) -> dict:
+
+ point_feature = self.point_encoder.encode_latents(data_dict["pc_normal"])
+ processed_point_feature = self.process_point_feature(point_feature=point_feature)
+ generate_length = self.max_length - self.cond_length
+ net_device = next(self.parameters()).device
+ outputs = torch.ones(self.args.batchsize_per_gpu, generate_length).long().to(net_device) * self.eos_token_id
+
+ if self.args.seq_shuffle:
+ num_joint_token = self.max_length - 2 - self.cond_length # During inference, this is the total length to generate
+ num_joints = num_joint_token // self.bone_per_token
+ target_aware_pos_embed = self.target_aware_pos_embed.repeat(self.args.batchsize_per_gpu, 1, 1) # [B, max_joint, embed_dim]
+
+ cond_pos_embed = target_aware_pos_embed[:, 0:1, :] # [B, 1, embed_dim]
+ cond_pos_embed = cond_pos_embed.repeat(1, self.cond_length, 1) # [B, cond_length, embed_dim]
+ bone_pos_embed = target_aware_pos_embed[:, 1:num_joints, :] # [B, num_joints-1, embed_dim]
+ bone_pos_embed_expanded = bone_pos_embed.unsqueeze(2).repeat(1, 1, self.bone_per_token, 1) # [B, num_joints-1, joint_per_token, embed_dim]
+ bone_pos_embed_expanded = bone_pos_embed_expanded.view(self.args.batchsize_per_gpu, num_joint_token-self.bone_per_token, self.feat_dim)
+ processed_point_feature += cond_pos_embed
+ else:
+ bone_pos_embed_expanded = None
+
+ # batch x ntokens
+ if self.args.num_beams is not None and "pc_normal" in data_dict:
+ results = self.transformer.generate(
+ inputs_embeds=processed_point_feature,
+ max_new_tokens=generate_length, # all faces plus two
+ num_beams=self.args.num_beams,
+ bos_token_id=self.bos_token_id,
+ eos_token_id=self.eos_token_id,
+ pad_token_id=self.pad_token_id,
+ target_pos_embed=bone_pos_embed_expanded
+ )
+ else:
+ results = self.transformer.generate(
+ inputs_embeds = processed_point_feature,
+ max_new_tokens = generate_length, # all faces plus two
+ do_sample=True,
+ top_k=50,
+ top_p=0.95,
+ bos_token_id = self.bos_token_id,
+ eos_token_id = self.eos_token_id,
+ pad_token_id = self.pad_token_id,
+ )
+ assert results.shape[1] <= generate_length # B x ID bos is not included since it's predicted
+ outputs[:, :results.shape[1]] = results
+ # batch x ntokens ====> batch x ntokens x D
+ outputs = outputs[:, 1: -1] # eos and bos removed
+
+ outputs[outputs == self.bos_token_id] = self.pad_id
+ outputs[outputs == self.eos_token_id] = self.pad_id
+ outputs[outputs == self.pad_token_id] = self.pad_id
+
+ outputs[outputs != self.pad_id] -= 3
+
+ if self.joint_token:
+ gen_joints = self.detokenize_joint_token(outputs)
+ else:
+ gen_joints = self.detokenize(outputs)
+
+ return gen_joints
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/utils/eval_utils.py b/third_party/Puppeteer/skeleton/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3326bd50fb90df01ff91e427cd596950abda2f28
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/utils/eval_utils.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2007 Free Software Foundation, Inc.
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: GNU General Public License v3.0
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under GNU General Public License v3.0, with the full license text
+# available at https://github.com/zhan-xu/RigNet/blob/master/LICENSE-GPLv3.txt.
+#
+# This modified file is released under the same license.
+
+import numpy as np
+
+##### for quantitative calculation
+def chamfer_dist(pt1, pt2):
+ pt1 = pt1[np.newaxis, :, :]
+ pt2 = pt2[:, np.newaxis, :]
+ dist = np.sqrt(np.sum((pt1 - pt2) ** 2, axis=2))
+ min_left = np.mean(np.min(dist, axis=0))
+ min_right = np.mean(np.min(dist, axis=1))
+ return (min_left + min_right) / 2
+
+def oneway_chamfer(pt_src, pt_dst):
+ pt1 = pt_src[np.newaxis, :, :]
+ pt2 = pt_dst[:, np.newaxis, :]
+ dist = np.sqrt(np.sum((pt1 - pt2) ** 2, axis=2))
+ avg_dist = np.mean(np.min(dist, axis=0))
+ return avg_dist
+
+def joint2bone_chamfer_dist(joints1, bones1, joints2, bones2):
+ bone_sample_1 = sample_skel(joints1, bones1)
+ bone_sample_2 = sample_skel(joints2, bones2)
+ dist1 = oneway_chamfer(joints1, bone_sample_2)
+ dist2 = oneway_chamfer(joints2, bone_sample_1)
+ return (dist1 + dist2) / 2
+
+def bone2bone_chamfer_dist(joints1, bones1, joints2, bones2):
+ bone_sample_1 = sample_skel(joints1, bones1)
+ bone_sample_2 = sample_skel(joints2, bones2)
+ return chamfer_dist(bone_sample_1, bone_sample_2)
+
+def sample_bone(p_pos, ch_pos):
+ ray = ch_pos - p_pos
+
+ bone_length = np.linalg.norm(p_pos - ch_pos)
+ num_step = np.round(bone_length / 0.005).astype(int)
+ i_step = np.arange(0, num_step + 1)
+ unit_step = ray / (num_step + 1e-30)
+ unit_step = np.repeat(unit_step[np.newaxis, :], num_step + 1, axis=0)
+ res = p_pos + unit_step * i_step[:, np.newaxis]
+ return res
+
+def sample_skel(joints, bones):
+ bone_sample = []
+ for parent_idx, child_idx in bones:
+ p_pos = joints[parent_idx]
+ ch_pos = joints[child_idx]
+ res = sample_bone(p_pos, ch_pos)
+ bone_sample.append(res)
+
+ if bone_sample:
+ bone_sample = np.concatenate(bone_sample, axis=0)
+ else:
+ bone_sample = np.empty((0, 3))
+
+ return bone_sample
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/utils/mesh_to_pc.py b/third_party/Puppeteer/skeleton/utils/mesh_to_pc.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e21676bd1dda454c52f7c348181771f94678183
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/utils/mesh_to_pc.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2023 S-Lab
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: S-Lab License 1.0
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under S-Lab License 1.0, with the full license text
+# available at https://github.com/buaacyw/MeshAnything/blob/main/LICENSE.txt.
+#
+# This modified file is released under the same license.
+
+import mesh2sdf.core
+import numpy as np
+import skimage.measure
+import trimesh
+import time
+from typing import List, Tuple
+
+class MeshProcessor:
+ """A class to handle mesh normalization, watertight conversion and point cloud sampling."""
+
+ @staticmethod
+ def normalize_mesh_vertices(vertices: np.ndarray, scaling_factor: float = 0.95) -> Tuple[np.ndarray, np.ndarray, float]:
+ """
+ Normalize mesh vertices to be centered at origin and scaled appropriately.
+ """
+ min_bounds = vertices.min(axis=0)
+ max_bounds = vertices.max(axis=0)
+
+ center = (min_bounds + max_bounds) * 0.5
+ max_dimension = (max_bounds - min_bounds).max()
+ scale = 2.0 * scaling_factor / max_dimension
+
+ normalized_vertices = (vertices - center) * scale
+ return normalized_vertices, center, scale
+
+ @staticmethod
+ def convert_to_watertight(mesh: trimesh.Trimesh, octree_depth: int = 7) -> trimesh.Trimesh:
+ """
+ Convert to watertight using mesh2sdf and marching cubes.
+ """
+ grid_size = 2 ** octree_depth
+ iso_level = 2 / grid_size
+
+ # Normalize vertices for SDF computation
+ normalized_vertices, original_center, original_scale = MeshProcessor.normalize_mesh_vertices(mesh.vertices)
+
+ # Compute signed distance field
+ sdf = mesh2sdf.core.compute(normalized_vertices, mesh.faces, size=grid_size)
+
+ # Run marching cubes algorithm
+ vertices, faces, normals, _ = skimage.measure.marching_cubes(np.abs(sdf), iso_level)
+
+ # Transform vertices back to original coordinate system
+ vertices = vertices / grid_size * 2 - 1 # Map to [-1, 1] range
+ vertices = vertices / original_scale + original_center
+
+ # Create new watertight mesh
+ watertight_mesh = trimesh.Trimesh(vertices, faces, normals=normals)
+ return watertight_mesh
+
+ @staticmethod
+ def convert_meshes_to_point_clouds(
+ meshes: List[trimesh.Trimesh],
+ points_per_mesh: int = 8192,
+ apply_marching_cubes: bool = False,
+ octree_depth: int = 7
+ ) -> List[np.ndarray]:
+ """
+ Process a list of meshes into point clouds with normals.
+ """
+ point_clouds_with_normals = []
+ processed_meshes = []
+
+ for mesh in meshes:
+ # Optionally convert to watertight mesh
+ if apply_marching_cubes:
+ start_time = time.time()
+ mesh = MeshProcessor.convert_to_watertight(mesh, octree_depth=octree_depth)
+ processing_time = time.time() - start_time
+ print(f"Marching cubes complete! Time: {processing_time:.2f}s")
+
+ # Store processed mesh
+ processed_meshes.append(mesh)
+
+ # Sample points and get corresponding face normals
+ points, face_indices = mesh.sample(points_per_mesh, return_index=True)
+ point_normals = mesh.face_normals[face_indices]
+
+ # Combine points and normals
+ points_with_normals = np.concatenate([points, point_normals], axis=-1, dtype=np.float16)
+ point_clouds_with_normals.append(points_with_normals)
+
+ return point_clouds_with_normals
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/utils/save_utils.py b/third_party/Puppeteer/skeleton/utils/save_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8030029f74de64cba213181270598f1ffa76921e
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/utils/save_utils.py
@@ -0,0 +1,504 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import numpy as np
+import cv2
+import json
+import trimesh
+
+from collections import deque, defaultdict
+from scipy.cluster.hierarchy import linkage, fcluster
+
+from data_utils.pyrender_wrapper import PyRenderWrapper
+from data_utils.data_loader import DataLoader
+
+def save_mesh(vertices, faces, filename):
+
+ mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+ mesh.export(filename, file_type='obj')
+
+def pred_joints_and_bones(bone_coor):
+ """
+ get joints (j,3) and bones (b,2) from (b,2,3), preserve the parent-child relationship
+ """
+ parent_coords = bone_coor[:, 0, :] # (b, 3)
+ child_coords = bone_coor[:, 1, :] # (b, 3)
+
+ all_coords = np.vstack([parent_coords, child_coords]) # (2b, 3)
+ pred_joints, indices = np.unique(all_coords, axis=0, return_inverse=True)
+
+ b = bone_coor.shape[0]
+ parent_indices = indices[:b]
+ child_indices = indices[b:]
+
+ pred_bones = np.column_stack([parent_indices, child_indices])
+
+ valid_bones = pred_bones[parent_indices != child_indices]
+
+ return pred_joints, valid_bones
+
+
+
+def merge_duplicate_joints_and_fix_bones(joints, bones, tolerance=0.0025, root_index=None):
+ """
+ merge duplicate joints that are within a certain tolerance distance, and fix bones to maintain connectivity.
+ Also merge bones that become duplicates after joint merging.
+ """
+ n_joints = len(joints)
+
+ # find merge joint groups
+ merge_groups = []
+ used = [False] * n_joints
+
+ for i in range(n_joints):
+ if used[i]:
+ continue
+
+ # find all joints within tolerance distance to joint i
+ group = [i]
+ for j in range(i + 1, n_joints):
+ if not used[j]:
+ dist = np.linalg.norm(joints[i] - joints[j])
+ if dist < tolerance:
+ group.append(j)
+ used[j] = True
+
+ used[i] = True
+ merge_groups.append(group)
+
+ # if len(group) > 1:
+ # print(f"find duplicate joints group: {group}")
+
+ # build merge map: choose representative joint
+ merge_map = {}
+ for group in merge_groups:
+ if root_index is not None and root_index in group:
+ representative = root_index
+ else:
+ representative = group[0] # else choose the first one as representative
+ for joint_idx in group:
+ merge_map[joint_idx] = representative
+
+ # track root joint change
+ intermediate_root_index = None
+ if root_index is not None:
+ intermediate_root_index = merge_map.get(root_index, root_index)
+ # if intermediate_root_index != root_index:
+ # print(f"root joint index changed from {root_index} to {intermediate_root_index}")
+
+ # update bones: remove self-loop bones, and merge duplicate bones
+ updated_bones = []
+
+ for parent, child in bones:
+ new_parent = merge_map.get(parent, parent)
+ new_child = merge_map.get(child, child)
+
+ if new_parent != new_child: # remove self-loop bones
+ updated_bones.append([new_parent, new_child])
+
+ # remove duplicate bones
+ unique_bones = []
+ seen_bones = set()
+
+ for bone in updated_bones:
+ bone_key = tuple(bone) # keep the order of [parent, child]
+ if bone_key not in seen_bones:
+ seen_bones.add(bone_key)
+ unique_bones.append(bone)
+
+ # re-index joints to remove unused joints
+ used_joint_indices = set()
+ for parent, child in unique_bones:
+ used_joint_indices.add(parent)
+ used_joint_indices.add(child)
+ if intermediate_root_index is not None:
+ used_joint_indices.add(intermediate_root_index)
+
+
+ used_joint_indices = sorted(list(used_joint_indices))
+
+ # new index for used joints
+ old_to_new = {old_idx: new_idx for new_idx, old_idx in enumerate(used_joint_indices)}
+
+ final_joints = joints[used_joint_indices]
+ final_bones = np.array([[old_to_new[parent], old_to_new[child]]
+ for parent, child in unique_bones])
+
+ final_root_index = None
+ if intermediate_root_index is not None:
+ final_root_index = old_to_new[intermediate_root_index]
+ if root_index is not None and final_root_index != root_index:
+ print(f"final root index: {root_index} -> {final_root_index}")
+
+ removed_joints = n_joints - len(final_joints)
+ removed_bones = len(bones) - len(final_bones)
+
+ # print
+ # if removed_joints > 0 or removed_bones > 0:
+ # print(f"merge results:")
+ # print(f" joint number: {n_joints} -> {len(final_joints)} (remove {removed_joints})")
+ # print(f" bone number: {len(bones)} -> {len(final_bones)} (remove {removed_bones})")
+
+ if root_index is not None:
+ return final_joints, final_bones, final_root_index
+ else:
+ return final_joints, final_bones
+
+def save_skeleton_to_txt(pred_joints, pred_bones, pred_root_index, hier_order, vertices, filename='skeleton.txt'):
+ """
+ save skeleton to txt file, the format follows Rignet (joints, root, hier)
+
+ if hier_order: the first joint index in bone is root joint index, and parent-child relationship is established in bones.
+ else: we set the joint nearest to the mesh center as the root joint, and then build hierarchy starting from root.
+ """
+
+ num_joints = pred_joints.shape[0]
+
+ # assign joint names
+ joint_names = [f'joint{i}' for i in range(num_joints)]
+
+ adjacency = defaultdict(list)
+ for bone in pred_bones:
+ idx_a, idx_b = bone
+ adjacency[idx_a].append(idx_b)
+ adjacency[idx_b].append(idx_a)
+
+ # find root joint
+ if hier_order:
+ root_idx = pred_root_index
+ else:
+ centroid = np.mean(vertices, axis=0)
+ distances = np.linalg.norm(pred_joints - centroid, axis=1)
+ root_idx = np.argmin(distances)
+
+ root_name = joint_names[root_idx]
+
+ # build hierarchy
+ parent_map = {}
+
+ if hier_order:
+ visited = set()
+
+ for parent_idx, child_idx in pred_bones:
+ if child_idx not in parent_map:
+ parent_map[child_idx] = parent_idx
+ visited.add(child_idx)
+ visited.add(parent_idx)
+
+ parent_map[root_idx] = None
+
+ else:
+ visited = set([root_idx])
+ queue = deque([root_idx])
+ parent_map[root_idx] = None
+
+ while queue:
+ current_idx = queue.popleft()
+ for neighbor_idx in adjacency[current_idx]:
+ if neighbor_idx not in visited:
+ parent_map[neighbor_idx] = current_idx
+ visited.add(neighbor_idx)
+ queue.append(neighbor_idx)
+
+ if len(visited) != num_joints:
+ print(f"bones are not fully connected, leaving {num_joints - len(visited)} joints unconnected.")
+
+ # save joints
+ joints_lines = []
+ for idx, coord in enumerate(pred_joints):
+ name = joint_names[idx]
+ joints_line = f'joints {name} {coord[0]:.8f} {coord[1]:.8f} {coord[2]:.8f}'
+ joints_lines.append(joints_line)
+
+ # save root name
+ root_line = f'root {root_name}'
+
+ # save hierarchy
+ hier_lines = []
+ for child_idx, parent_idx in parent_map.items():
+ if parent_idx is not None:
+ parent_name = joint_names[parent_idx]
+ child_name = joint_names[child_idx]
+ hier_line = f'hier {parent_name} {child_name}'
+ hier_lines.append(hier_line)
+
+ with open(filename, 'w') as file:
+ for line in joints_lines:
+ file.write(line + '\n')
+
+ file.write(root_line + '\n')
+
+ for line in hier_lines:
+ file.write(line + '\n')
+
+def save_skeleton_to_txt_joint(pred_joints, pred_bones, filename='skeleton.txt'):
+ """
+ save skeleton to txt file, the format follows Rignet (joints, root, hier)
+ """
+
+ num_joints = pred_joints.shape[0]
+
+ # assign joint names
+ joint_names = [f'joint{i}' for i in range(num_joints)]
+
+ # find potential root joints
+ all_parents = set([bone[0] for bone in pred_bones])
+ all_children = set([bone[1] for bone in pred_bones])
+ potential_roots = all_parents - all_children
+
+ # determine root joint
+ if not potential_roots:
+ print("Warning: No joint is only a parent, choosing the first joint as root.")
+ root_idx = pred_bones[0, 0]
+ else:
+ if len(potential_roots) > 1:
+ print(f"Warning: Multiple potential root joints found ({len(potential_roots)}), choosing the first one.")
+ root_idx = list(potential_roots)[0]
+
+ root_name = joint_names[root_idx]
+
+ # build hierarchy
+ parent_map = {}
+ visited = set()
+
+ for parent_idx, child_idx in pred_bones:
+ if child_idx not in parent_map:
+ parent_map[child_idx] = parent_idx
+ visited.add(child_idx)
+ visited.add(parent_idx)
+
+ parent_map[root_idx] = None
+
+ if len(visited) != num_joints:
+ print(f"Warning: bones are not fully connected, leaving {num_joints - len(visited)} joints unconnected.")
+
+ # save joints
+ joints_lines = []
+ for idx, coord in enumerate(pred_joints):
+ name = joint_names[idx]
+ joints_line = f'joints {name} {coord[0]:.8f} {coord[1]:.8f} {coord[2]:.8f}'
+ joints_lines.append(joints_line)
+
+ # save root name
+ root_line = f'root {root_name}'
+
+ # save hierarchy
+ hier_lines = []
+ for child_idx, parent_idx in parent_map.items():
+ if parent_idx is not None:
+ parent_name = joint_names[parent_idx]
+ child_name = joint_names[child_idx]
+ hier_line = f'hier {parent_name} {child_name}'
+ hier_lines.append(hier_line)
+
+ with open(filename, 'w') as file:
+ for line in joints_lines:
+ file.write(line + '\n')
+
+ file.write(root_line + '\n')
+
+ for line in hier_lines:
+ file.write(line + '\n')
+ return root_idx
+
+
+def save_skeleton_obj(joints, bones, save_path, root_index=None, radius_sphere=0.01,
+ radius_bone=0.005, segments=16, stacks=16, use_cone=False):
+ """
+ Save skeletons to obj file, each connection contains two red spheres (joint) and one blue cylinder (bone).
+ if root index is known, set root sphere to green.
+ """
+
+ all_vertices = []
+ all_colors = []
+ all_faces = []
+ vertex_offset = 0
+
+ # create spheres for joints
+ for i, joint in enumerate(joints):
+ # define color
+ if root_index is not None and i == root_index:
+ color = (0, 1, 0) # green for root joint
+ else:
+ color = (1, 0, 0) # red for other joints
+
+ # create joint sphere
+ sphere_vertices, sphere_faces = create_sphere(joint, radius=radius_sphere, segments=segments, stacks=stacks)
+ all_vertices.extend(sphere_vertices)
+ all_colors.extend([color] * len(sphere_vertices))
+
+ # adjust face index
+ adjusted_sphere_faces = [(v1 + vertex_offset, v2 + vertex_offset, v3 + vertex_offset) for (v1, v2, v3) in sphere_faces]
+ all_faces.extend(adjusted_sphere_faces)
+ vertex_offset += len(sphere_vertices)
+
+ # create bones
+ for bone in bones:
+ parent_idx, child_idx = bone
+ parent = joints[parent_idx]
+ child = joints[child_idx]
+
+ try:
+ bone_vertices, bone_faces = create_bone(parent, child, radius=radius_bone, segments=segments, use_cone=use_cone)
+ except ValueError as e:
+ print(f"Skipping connection {parent_idx}-{child_idx}, reason: {e}")
+ continue
+
+ all_vertices.extend(bone_vertices)
+ all_colors.extend([(0, 0, 1)] * len(bone_vertices)) # blue
+
+ # adjust face index
+ adjusted_bone_faces = [(v1 + vertex_offset, v2 + vertex_offset, v3 + vertex_offset) for (v1, v2, v3) in bone_faces]
+ all_faces.extend(adjusted_bone_faces)
+ vertex_offset += len(bone_vertices)
+
+ # save to obj
+ obj_lines = []
+ for v, c in zip(all_vertices, all_colors):
+ obj_lines.append(f"v {v[0]} {v[1]} {v[2]} {c[0]} {c[1]} {c[2]}")
+ obj_lines.append("")
+
+ for face in all_faces:
+ obj_lines.append(f"f {face[0]} {face[1]} {face[2]}")
+
+ with open(save_path, 'w') as obj_file:
+ obj_file.write("\n".join(obj_lines))
+
+def create_sphere(center, radius=0.01, segments=16, stacks=16):
+ vertices = []
+ faces = []
+ for i in range(stacks + 1):
+ lat = np.pi / 2 - i * np.pi / stacks
+ xy = radius * np.cos(lat)
+ z = radius * np.sin(lat)
+ for j in range(segments):
+ lon = j * 2 * np.pi / segments
+ x = xy * np.cos(lon) + center[0]
+ y = xy * np.sin(lon) + center[1]
+ vertices.append((x, y, z + center[2]))
+ for i in range(stacks):
+ for j in range(segments):
+ first = i * segments + j
+ second = first + segments
+ third = first + 1 if (j + 1) < segments else i * segments
+ fourth = second + 1 if (j + 1) < segments else (i + 1) * segments
+ faces.append((first + 1, second + 1, fourth + 1))
+ faces.append((first + 1, fourth + 1, third + 1))
+ return vertices, faces
+
+def create_bone(start, end, radius=0.005, segments=16, use_cone=False):
+ dir_vector = np.array(end) - np.array(start)
+ height = np.linalg.norm(dir_vector)
+ if height == 0:
+ raise ValueError("Start and end points cannot be the same for a cone.")
+ dir_vector = dir_vector / height
+
+ z = np.array([0, 0, 1])
+ if np.allclose(dir_vector, z):
+ R = np.identity(3)
+ elif np.allclose(dir_vector, -z):
+ R = np.array([[-1,0,0],[0,-1,0],[0,0,1]])
+ else:
+ v = np.cross(z, dir_vector)
+ s = np.linalg.norm(v)
+ c = np.dot(z, dir_vector)
+ kmat = np.array([[0, -v[2], v[1]],
+ [v[2], 0, -v[0]],
+ [-v[1], v[0], 0]])
+ R = np.identity(3) + kmat + np.matmul(kmat, kmat) * ((1 - c) / (s**2))
+
+ theta = np.linspace(0, 2 * np.pi, segments, endpoint=False)
+ base_circle = np.array([np.cos(theta), np.sin(theta), np.zeros(segments)]) * radius
+
+ vertices = []
+ for point in base_circle.T:
+ rotated = np.dot(R, point) + np.array(start)
+ vertices.append(tuple(rotated))
+
+
+ faces = []
+
+ if use_cone:
+ vertices.append(tuple(end))
+
+ apex_idx = segments + 1
+ for i in range(segments):
+ next_i = (i + 1) % segments
+ faces.append((i + 1, next_i + 1, apex_idx))
+ else:
+ top_circle = np.array([np.cos(theta), np.sin(theta), np.ones(segments)]) * radius
+ for point in top_circle.T:
+ point_scaled = np.array([point[0], point[1], height])
+ rotated = np.dot(R, point_scaled) + np.array(start)
+ vertices.append(tuple(rotated))
+ for i in range(segments):
+ next_i = (i + 1) % segments
+ faces.append((i + 1, next_i + 1, next_i + segments + 1))
+ faces.append((i + 1, next_i + segments + 1, i + segments + 1))
+
+ return vertices, faces
+
+def render_mesh_with_skeleton(joints, bones, vertices, faces, output_dir, filename, prefix='pred', root_idx=None):
+ """
+ Render the mesh with skeleton using PyRender.
+ """
+ loader = DataLoader()
+
+ raw_size = (960, 960)
+ renderer = PyRenderWrapper(raw_size)
+
+ save_dir = os.path.join(output_dir, 'render_results')
+ os.makedirs(save_dir, exist_ok=True)
+
+ loader.joints = joints
+ loader.bones = bones
+ loader.root_idx = root_idx
+
+ mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+ mesh.visual.vertex_colors[:, 3] = 100 # set transparency
+ loader.mesh = mesh
+ v = mesh.vertices
+ xmin, ymin, zmin = v.min(axis=0)
+ xmax, ymax, zmax = v.max(axis=0)
+ loader.bbox_center = np.array([(xmax + xmin)/2, (ymax + ymin)/2, (zmax + zmin)/2])
+ loader.bbox_size = np.array([xmax - xmin, ymax - ymin, zmax - zmin])
+ loader.bbox_scale = max(xmax - xmin, ymax - ymin, zmax - zmin)
+ loader.normalize_coordinates()
+
+ input_dict = loader.query_mesh_rig()
+
+ angles = [0, np.pi/2, np.pi, 3*np.pi/2]
+ distance = np.max(loader.bbox_size) * 2
+
+ subfolder_path = os.path.join(save_dir, filename + '_' + prefix)
+
+ os.makedirs(subfolder_path, exist_ok=True)
+
+ for i, angle in enumerate(angles):
+ renderer.set_camera_view(angle, loader.bbox_center, distance)
+ renderer.align_light_to_camera()
+
+ color = renderer.render(input_dict)[0]
+
+ output_filename = f"{filename}_{prefix}_view{i+1}.png"
+ output_filepath = os.path.join(subfolder_path, output_filename)
+ cv2.imwrite(output_filepath, color)
+
+
+def save_args(args, output_dir, filename="config.json"):
+ args_dict = vars(args)
+ os.makedirs(output_dir, exist_ok=True)
+ config_path = os.path.join(output_dir, filename)
+ with open(config_path, 'w') as f:
+ json.dump(args_dict, f, indent=4)
\ No newline at end of file
diff --git a/third_party/Puppeteer/skeleton/utils/skeleton_data_loader.py b/third_party/Puppeteer/skeleton/utils/skeleton_data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4cb10f226ba2c9698ee78df91f5e211bcf31588
--- /dev/null
+++ b/third_party/Puppeteer/skeleton/utils/skeleton_data_loader.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import is_tensor
+from torch.utils.data import Dataset
+from torch.nn.utils.rnn import pad_sequence
+from data_utils.save_npz import normalize_to_unit_cube
+
+import numpy as np
+
+class SkeletonData(Dataset):
+ """
+ A PyTorch Dataset to load and process skeleton data.
+ """
+ def __init__(self, data, args, is_training):
+ self.data = data
+
+ self.input_pc_num = args.input_pc_num
+ self.is_training = is_training
+
+ self.hier_order = args.hier_order
+ print(f"[Dataset] Created from {len(self.data)} entries")
+
+ def __len__(self):
+ return len(self.data)
+
+ def __getitem__(self, idx):
+ data = self.data[idx]
+
+ joints = data['joints']
+ vertices = data['vertices']
+ pc_normal = data['pc_w_norm']
+
+ indices = np.random.choice(pc_normal.shape[0], self.input_pc_num, replace=False)
+ pc_normal = pc_normal[indices, :]
+
+ pc_coor = pc_normal[:, :3]
+ normal = pc_normal[:, 3:]
+ if np.linalg.norm(normal, axis=1, keepdims=True).min() < 0.99:
+ print("normal reroll")
+ return self.__getitem__(np.random.randint(0, len(self.data)))
+
+ data_dict = {}
+
+ # normalize normal
+ normal = normal / np.linalg.norm(normal, axis=1, keepdims=True)
+
+ # scale to -0.5 to 0.5
+ _, center, scale = normalize_to_unit_cube(vertices.copy(), scale_factor=0.9995)
+ joints = (joints - center) * scale # align joints with pc first
+
+ bounds = np.array([pc_coor.min(axis=0), pc_coor.max(axis=0)])
+ pc_center = (bounds[0] + bounds[1])[None, :] / 2
+ pc_scale = (bounds[1] - bounds[0]).max() + 1e-5
+ pc_coor = (pc_coor - pc_center) / pc_scale
+ joints = (joints - pc_center) / pc_scale
+
+ joints = joints.clip(-0.5, 0.5)
+
+ data_dict['joints'] = torch.from_numpy(np.asarray(joints).astype(np.float16))
+ data_dict['bones'] = torch.from_numpy(data['bones'].astype(np.int64))
+ pc_coor = pc_coor / np.abs(pc_coor).max() * 0.9995
+ data_dict['pc_normal'] = torch.from_numpy(np.concatenate([pc_coor, normal], axis=-1).astype(np.float16))
+ data_dict['vertices'] = torch.from_numpy(data['vertices'].astype(np.float16))
+ data_dict['faces'] = torch.from_numpy(data['faces'].astype(np.int64))
+ data_dict['uuid'] = data['uuid']
+ data_dict['root_index'] = str(data['root_index'])
+ data_dict['transform_params'] = torch.tensor([
+ center[0], center[1], center[2],
+ scale,
+ pc_center[0][0], pc_center[0][1], pc_center[0][2],
+ pc_scale
+ ], dtype=torch.float32)
+
+ return data_dict
+
+ @classmethod
+ def load(cls, args, is_training=True):
+ loaded_data = np.load(args.dataset_path, allow_pickle=True)
+ data = []
+ for item in loaded_data["arr_0"]:
+ data.append(item)
+ print(f"[Dataset] Loaded {len(data)} entries")
+ return cls(data, args, is_training)
+
+
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/README.md b/third_party/Puppeteer/skinning/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cebfcfb62a1d681b576791cf4580f1aa19c6aec3
--- /dev/null
+++ b/third_party/Puppeteer/skinning/README.md
@@ -0,0 +1,47 @@
+# Skinning Weight Prediction
+This folder provides the skinning weight prediction implementation and scripts to evaluate the paper’s metrics on three test sets. You can also run inference on your own 3D objects.
+
+## Weights Download
+First download [checkpoints of PartField](https://huggingface.co/mikaelaangel/partfield-ckpt) and our [released weights](https://huggingface.co/Seed3D/Puppeteer) for skinning weight prediction:
+
+```
+ln -s ../../skeleton/third_partys/Michelangelo third_partys/Michelangelo
+python download.py
+```
+
+## Evaluation
+
+To reproduce our evaluations, run the following command on `Articulation-XL2.0-test`, `ModelResource-test` and `Diverse-pose-test`. The test sets are available [here](https://drive.google.com/drive/folders/1zIAcg1sAJtVemMKybZEMPnUzKXDST_dX?usp=sharing), we preprocess the released NPZ files and save them as h5 files (check `utils/save_h5.py` for how we save them). The inference process requires 4.2 GB of VRAM.
+
+```
+bash eval.sh
+```
+
+We save the skinning weights as `.npy` files by passing `--save_skin_npy`.
+
+## Demo
+
+Given meshes and skeletons, we can predict skinning weights by running:
+
+```
+bash demo.sh
+```
+
+For inputs, place meshes `.obj` files in the directory specified by `--mesh_folder`, and place rig `.txt` files in `--input_skel_folder`. Each mesh and rig pair must share same filenames. The rig files should follow the RigNet format containing:
+
+```
+joints [joint_name] [x] [y] [z]
+root [root_joint_name]
+hier [parent_joint_name] [child_joint_name]
+```
+
+If you are using GLB files, refer to `skeleton/data_utils/read_rig_mesh_from_glb.py` for reading the mesh and rig. After predict skinning weights, we will save the final rig files by adding skinning lines:
+
+```
+skin [vertex_index] [joints_name1] [skinning_weight1] [joints_name2] [skinning_weight2] ...
+```
+
+⚠️ Note that meshes with complex topology may require more data processing time.
+
+## Visualization
+The skinning visualizations shown in the paper can be reproduced using `utils/visualize.py`. This script generates two types of visualizations: (1) objects with skinning weights represented as colors, and (2) objects with L1 error maps that highlight differences between predicted and ground truth skinning weights.
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/demo.sh b/third_party/Puppeteer/skinning/demo.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0b3b06734b72425b5f667a60e4f719c8f8c4ebe8
--- /dev/null
+++ b/third_party/Puppeteer/skinning/demo.sh
@@ -0,0 +1,9 @@
+CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=10009 main.py \
+ --num_workers 1 --batch_size 1 --generate --save_skin_npy \
+ --pretrained_weights skinning_ckpts/puppeteer_skin_w_diverse_pose_depth1.pth \
+ --input_skel_folder skel_folder \
+ --mesh_folder mesh_folder \
+ --post_filter --depth 1 --save_folder outputs
+
+### We recommend enabling `--post_filter` to smooth skinning weights by averaging the weights of neighboring vertices.
+### If results are unsatisfactory, try increasing `--depth` from 1 to 2 and updating the checkpoint path.
diff --git a/third_party/Puppeteer/skinning/download.py b/third_party/Puppeteer/skinning/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..df6a78977d34b818666989c7a6b51873bf3a7747
--- /dev/null
+++ b/third_party/Puppeteer/skinning/download.py
@@ -0,0 +1,25 @@
+from huggingface_hub import hf_hub_download
+
+file_path = hf_hub_download(
+ repo_id="mikaelaangel/partfield-ckpt",
+ filename="model_objaverse.ckpt",
+ local_dir="third_partys/PartField/ckpt"
+)
+
+file_path = hf_hub_download(
+ repo_id="Seed3D/Puppeteer",
+ filename="skinning_ckpts/puppeteer_skin_w_diverse_pose_depth1.pth",
+ local_dir="skinning"
+)
+
+file_path = hf_hub_download(
+ repo_id="Seed3D/Puppeteer",
+ filename="skinning_ckpts/puppeteer_skin_w_diverse_pose_depth2.pth",
+ local_dir="skinning"
+)
+
+file_path = hf_hub_download(
+ repo_id="Seed3D/Puppeteer",
+ filename="skinning_ckpts/puppeteer_skin_wo_diverse_pose_depth1.pth",
+ local_dir="skinning"
+)
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/eval.sh b/third_party/Puppeteer/skinning/eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..83706590b0a6f9220a57fe655be60df8cb456460
--- /dev/null
+++ b/third_party/Puppeteer/skinning/eval.sh
@@ -0,0 +1,8 @@
+
+CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=10011 main.py \
+ --num_workers 1 --batch_size 1 --depth 1 --eval \
+ --xl_test --save_skin_npy --save_folder outputs \
+ --eval_data_path articulation_xl2_test.h5 \
+ --pretrained_weights skinning_ckpts/puppeteer_skin_w_diverse_pose_depth1.pth
+
+# remember to change eval_data_path and pass [xl_test, pose_test, modelres_test] when evaluating.
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/main.py b/third_party/Puppeteer/skinning/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..c89151fb947ba3699d9d856f8660b2794ad2b902
--- /dev/null
+++ b/third_party/Puppeteer/skinning/main.py
@@ -0,0 +1,344 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import numpy as np
+import os
+import time
+from pathlib import Path
+from scipy.spatial import cKDTree
+
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn.functional as F
+
+torch.set_num_threads(8)
+
+import utils.misc as misc
+import skinning_models.models as models
+from utils.misc import NativeScalerWithGradNormCount as NativeScaler
+from utils.skin_data import SkinData
+from utils.util import save_skin_weights_to_rig, post_filter
+
+def get_args_parser():
+ parser = argparse.ArgumentParser('Autoencoder', add_help=False)
+ parser.add_argument('--batch_size', default=64, type=int,
+ help='Batch size per GPU (effective batch size is batch_size * accum_iter * # gpus')
+ parser.add_argument('--lr', default=1e-4, type=float, help='learning rate')
+ parser.add_argument('--model', default='SkinningNetStacked', type=str, metavar='MODEL',
+ help='Name of model to train')
+ parser.add_argument('--device', default='cuda',
+ help='device to use for training / testing')
+ parser.add_argument('--seed', default=0, type=int)
+ parser.add_argument('--num_workers', default=60, type=int)
+ parser.add_argument('--pin_mem', action='store_true',
+ help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
+ parser.add_argument('--no_pin_mem', action='store_false', dest='pin_mem')
+ parser.set_defaults(pin_mem=False)
+
+ # distributed training parameters
+ parser.add_argument('--world_size', default=1, type=int,
+ help='number of distributed processes')
+ parser.add_argument('--local_rank', default=-1, type=int)
+ parser.add_argument('--dist_on_itp', action='store_true')
+ parser.add_argument('--dist_url', default='env://',
+ help='url used to set up distributed training')
+
+ parser.add_argument('--pretrained_weights', default=None, type=str, help='dataset path')
+ parser.add_argument('--depth', default=1, type=int, help='network depth in transformer')
+ parser.add_argument('--max_joints', default=70, type=int, help='max joints')
+ parser.add_argument('--use_TAJA', action='store_true', default=True, help='whether to use TAJA')
+ parser.add_argument('--save_folder', default="outputs", type=str, help='save folder')
+ parser.add_argument('--save_skin_npy', action='store_true', default=False, help='save skinning weights as npy files')
+
+ # for evaluation
+ parser.add_argument('--eval', action='store_true', help='Perform evaluation only')
+ parser.add_argument('--eval_data_path', default=None, type=str, help='eval dataset path')
+ parser.add_argument('--pose_test', action='store_true', default=False, help='evaluate on diverse pose test set')
+ parser.add_argument('--modelres_test', action='store_true', default=False, help='evaluate on modelresources test set')
+ parser.add_argument('--xl_test', action='store_true', default=False, help='evaluate on articulation-xl test set')
+ parser.add_argument('--filter_thre', default=0.15, type=float, help='filter threshold')
+
+ # for generation
+ parser.add_argument('--generate', action='store_true', default=False, help='Perform inference')
+ parser.add_argument('--input_skel_folder', default=None, type=str, help='input skeleton folder')
+ parser.add_argument('--mesh_folder', default=None, type=str, help='input mesh folder')
+ parser.add_argument('--post_filter', action='store_true', default=False, help='whether to do post filtering')
+
+ return parser
+
+@torch.no_grad()
+def evaluate(data_loader, model, device, args):
+ model.eval()
+
+ prec_total = []
+ rec_total = []
+ l1_dist_total = []
+ infer_all_time = []
+
+ output_dir = args.save_folder
+ os.makedirs(output_dir, exist_ok=True)
+ eval_file = os.path.join(output_dir,
+ 'evaluate_pose_test.txt' if args.pose_test else
+ 'evaluate_modelres_test.txt' if args.modelres_test else
+ 'evaluate_xl_test.txt' if args.xl_test else
+ 'evaluate_default.txt')
+ if args.modelres_test:
+ args.filter_thre = 0.35
+
+ with open(eval_file, 'w') as f:
+ def log_print(*args, **kwargs):
+ print(*args, **kwargs)
+ print(*args, **kwargs, file=f)
+
+ for data_iter_step, (sample_points, pc_w_norm, skeleton, valid_joints_mask, dist_graph, vertices, file_name, edges, gt_skin) in enumerate(data_loader):
+
+ sample_points = sample_points.to(device, non_blocking=True)
+ pc_w_norm = pc_w_norm.to(device, non_blocking=True)
+ skeleton = skeleton.to(device, non_blocking=True)
+ valid_joints_mask = valid_joints_mask.to(device, non_blocking=True)
+ dist_graph = dist_graph.to(device, non_blocking=True)
+ edges = edges.to(device, non_blocking=True)
+
+ start_time = time.time()
+ with torch.cuda.amp.autocast(enabled=False):
+ generate_skin = model(
+ sample_points,
+ skeleton,
+ pc_w_norm,
+ dist_graph,
+ valid_joints_mask
+ )
+ infer_time_pre_mesh = time.time() - start_time
+ infer_all_time.append(infer_time_pre_mesh)
+
+ generate_skin_np = generate_skin.cpu().numpy() # (batch_size, ...)
+ gt_skin_np = gt_skin.cpu().numpy()
+
+ valid_joints_mask_np = valid_joints_mask.cpu().numpy() # (batch_size, num_joints)
+
+ batch_size = generate_skin_np.shape[0]
+ for i in range(batch_size):
+ tree = cKDTree(sample_points[i][:,:3].cpu().numpy())
+ _, indices = tree.query(vertices[i].cpu().numpy())
+ current_generate_skin = generate_skin_np[i][indices] # (n_vertex, n_joints)
+ current_gt_skin = gt_skin_np[i] # (n_vertex, num_joints)
+ current_valid_joints_mask = valid_joints_mask_np[i] # (num_joints,)
+
+ valid_joint_indices = np.where(current_valid_joints_mask == 1)[0]
+
+ if len(valid_joint_indices) == 0:
+ continue
+
+ generate_skin_masked = current_generate_skin[:, valid_joint_indices]
+ gt_skin_masked = current_gt_skin[:, valid_joint_indices]
+
+ if generate_skin_masked.size == 0:
+ continue
+
+ generate_skin_masked[generate_skin_masked < 1e-3] = 0.0
+
+ if args.post_filter:
+ generate_skin_masked = post_filter(generate_skin_masked, edges[i].cpu().numpy(), num_ring=1)
+
+ generate_skin_masked[generate_skin_masked < np.max(generate_skin_masked, axis=1, keepdims=True) * args.filter_thre] = 0.0
+ generate_skin_masked = generate_skin_masked / (generate_skin_masked.sum(axis=1, keepdims=True)+1e-10)
+
+ valid_rows = np.abs(np.sum(gt_skin_masked, axis=1) - 1) < 1e-2
+ generate_skin_masked = generate_skin_masked[valid_rows]
+ gt_skin_masked = gt_skin_masked[valid_rows]
+
+ if args.save_skin_npy:
+ test_folder = ('xl_test' if args.xl_test else
+ 'pose_test' if args.pose_test else
+ 'modelres_test' if args.modelres_test else 'default')
+ os.makedirs(os.path.join(output_dir, test_folder), exist_ok=True)
+ npy_path = os.path.join(output_dir, test_folder, f"{file_name[i]}_skin.npy")
+ np.save(npy_path, generate_skin_masked)
+
+ # metrics
+ precision = np.sum(np.logical_and(generate_skin_masked > 0, gt_skin_masked > 0)) / (np.sum(generate_skin_masked > 0) + 1e-10)
+ recall = np.sum(np.logical_and(generate_skin_masked > 0, gt_skin_masked > 0)) / (np.sum(gt_skin_masked > 0) + 1e-10)
+ mean_l1_dist = np.sum(np.abs(generate_skin_masked - gt_skin_masked)) /len(generate_skin_masked)
+
+ log_print('for', data_iter_step, ',', file_name[i], ': precision:', precision, 'recall:', recall, 'mean_l1_dist:', mean_l1_dist)
+
+ prec_total.append(precision)
+ rec_total.append(recall)
+ l1_dist_total.append(mean_l1_dist)
+
+ print("number of items: " + str(len(l1_dist_total)))
+ final_precision = np.mean(prec_total) if prec_total else 0.0
+ final_recall = np.mean(rec_total) if rec_total else 0.0
+ final_avg_l1_dist = np.mean(l1_dist_total) if l1_dist_total else 0.0
+ avg_infer_time = np.mean(infer_all_time)
+
+ log_print('final_precision: ', final_precision,
+ 'final_recall: ', final_recall,
+ 'final_avg_l1_dist: ', final_avg_l1_dist,
+ 'avg_infer_time: ', avg_infer_time)
+
+@torch.no_grad()
+def generate(data_loader, model, device, args):
+ model.eval()
+
+ for data_iter_step, (sample_points, pc_w_norm, skeleton, valid_joints_mask, dist_graph, vertices, file_name, edges) in enumerate(data_loader):
+
+ sample_points = sample_points.to(device, non_blocking=True)
+ pc_w_norm = pc_w_norm.to(device, non_blocking=True)
+ skeleton = skeleton.to(device, non_blocking=True)
+ valid_joints_mask = valid_joints_mask.to(device, non_blocking=True)
+ dist_graph = dist_graph.to(device, non_blocking=True)
+ edges = edges.to(device, non_blocking=True)
+
+ if skeleton[0].shape[0] > args.max_joints:
+ continue
+
+ with torch.cuda.amp.autocast(enabled=False):
+ generate_skin = model(
+ sample_points,
+ skeleton,
+ pc_w_norm,
+ dist_graph,
+ valid_mask=valid_joints_mask,
+ )
+
+ generate_skin_np = generate_skin.cpu().numpy() # (batch_size, ...)
+
+ valid_joints_mask_np = valid_joints_mask.cpu().numpy() # (batch_size, num_joints)
+
+ batch_size = generate_skin_np.shape[0]
+ for i in range(batch_size):
+
+ tree = cKDTree(sample_points[i][:,:3].cpu().numpy())
+ _, indices = tree.query(vertices[i].cpu().numpy())
+ current_generate_skin = generate_skin_np[i][indices] # (n_vertex, n_joints)
+
+ current_valid_joints_mask = valid_joints_mask_np[i] # (num_joints,)
+
+ valid_joint_indices = np.where(current_valid_joints_mask == 1)[0]
+
+ if len(valid_joint_indices) == 0:
+ continue
+
+ generate_skin_masked = current_generate_skin[:, valid_joint_indices]
+
+ if generate_skin_masked.size == 0:
+ continue
+
+ if args.post_filter:
+ generate_skin_masked = post_filter(generate_skin_masked, edges[i].cpu().numpy(), num_ring=1)
+
+ generate_skin_masked[generate_skin_masked < np.max(generate_skin_masked, axis=1, keepdims=True) * 0.35] = 0.0
+ generate_skin_masked = generate_skin_masked / (generate_skin_masked.sum(axis=1, keepdims=True))
+
+ output_dir = args.save_folder
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ pred_rig_path = os.path.join(args.input_skel_folder, f'{file_name[i]}.txt')
+ print(file_name[i])
+
+ # save rig files with skinning weights
+ os.makedirs(os.path.join(output_dir, 'generate'), exist_ok=True)
+ output_path = os.path.join(output_dir, f'generate/{file_name[i]}_skin.txt')
+ print(output_path)
+
+ save_skin_weights_to_rig(pred_rig_path, generate_skin_masked, output_path)
+ np.save(os.path.join(output_dir, f"generate/{file_name[i]}_skin.npy"), generate_skin_masked)
+
+def main(args):
+ misc.init_distributed_mode(args)
+
+ print('job dir: {}'.format(os.path.dirname(os.path.realpath(__file__))))
+
+ device = torch.device(args.device)
+
+ # fix the seed for reproducibility
+ seed = args.seed + misc.get_rank()
+ torch.manual_seed(seed)
+ np.random.seed(seed)
+
+ cudnn.benchmark = True
+
+ if args.eval:
+ dataset_val = SkinData(args, mode='eval', query_num=8192)
+ elif args.generate:
+ dataset_val = SkinData(args, mode='generate', query_num=8192)
+ else:
+ dataset_train = SkinData(args, mode='train', query_num=8192)
+
+ num_tasks = misc.get_world_size()
+ global_rank = misc.get_rank()
+ if not args.eval and not args.generate:
+ sampler_train = torch.utils.data.DistributedSampler(
+ dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True
+ )
+ print("Sampler_train = %s" % str(sampler_train))
+ else:
+ sampler_val = torch.utils.data.SequentialSampler(dataset_val)
+
+ if args.eval or args.generate:
+ data_loader_val = torch.utils.data.DataLoader(
+ dataset_val, sampler=sampler_val,
+ batch_size=args.batch_size,
+ num_workers=args.num_workers,
+ pin_memory=args.pin_mem,
+ drop_last=False
+ )
+ else:
+ data_loader_train = torch.utils.data.DataLoader(
+ dataset_train, sampler=sampler_train,
+ batch_size=args.batch_size,
+ num_workers=args.num_workers,
+ pin_memory=args.pin_mem,
+ drop_last=True,
+ persistent_workers=True,
+ prefetch_factor=4
+ )
+
+ model = models.__dict__[args.model](args)
+
+ model.to(device)
+
+ n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+ # print("Model = %s" % str(model))
+ print('number of params (M): %.2f' % (n_parameters / 1.e6))
+
+ model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=False)
+ model_without_ddp = model.module
+
+ optimizer = torch.optim.AdamW(model_without_ddp.parameters(), lr=args.lr)
+
+ if args.pretrained_weights is not None:
+ pkg = torch.load(args.pretrained_weights, map_location=torch.device("cpu"))
+ model_without_ddp.load_state_dict(pkg["model"])
+
+ if args.generate:
+ generate(data_loader_val, model_without_ddp, device, args)
+ elif args.eval:
+ if not any([args.xl_test, args.pose_test, args.modelres_test]):
+ raise ValueError("Please specify a test type: --xl_test, --pose_test, or --modelres_test")
+
+ test_type = ('Articulation-XL2.0 Test' if args.xl_test else
+ 'Diverse-Pose Test' if args.pose_test else
+ 'ModelsResource Test')
+ print(f"Running evaluation: {test_type}")
+ evaluate(data_loader_val, model_without_ddp, device, args)
+
+if __name__ == '__main__':
+ args = get_args_parser()
+ args = args.parse_args()
+ main(args)
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/skinning_models/models.py b/third_party/Puppeteer/skinning/skinning_models/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..48dd0057ceb1ca338aa096f5eccb3d0ba82cfcfa
--- /dev/null
+++ b/third_party/Puppeteer/skinning/skinning_models/models.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+
+from skinning_models.networks import PreNorm, FeedForward, Attention, PointEmbed
+from third_partys.Michelangelo.encode import load_model
+from third_partys.PartField.encode import partfield
+
+class SkinningBlock(nn.Module):
+ def __init__(self, dim, args, heads=8, dim_head=64, ff_mult=4):
+ super().__init__()
+ # -- Self-Attn (joint)
+ self.self_attn_j = PreNorm(dim, Attention(dim, dim, heads=heads, dim_head=dim_head), context_dim = dim)
+ self.ff_j = PreNorm(dim, FeedForward(dim, mult=ff_mult))
+
+ # -- Cross-Attn (point -> shape)
+ self.cross_attn_ps = PreNorm(dim, Attention(dim, dim, heads=heads, dim_head=dim_head), context_dim = dim)
+ self.ff_ps = PreNorm(dim, FeedForward(dim, mult=ff_mult))
+
+ # -- Cross-Attn (joint -> shape)
+ self.cross_attn_js = PreNorm(dim, Attention(dim, dim, heads=heads, dim_head=dim_head), context_dim = dim)
+ self.ff_js = PreNorm(dim, FeedForward(dim, mult=ff_mult))
+
+ # -- Cross-Attn (joint -> point)
+ self.cross_attn_jp = PreNorm(dim, Attention(dim, dim, heads=heads, dim_head=dim_head), context_dim = dim)
+ self.ff_jp = PreNorm(dim, FeedForward(dim, mult=ff_mult))
+
+ # -- Cross-Attn (point -> joint)
+ self.cross_attn_pj = PreNorm(dim, Attention(dim, dim, heads=heads, dim_head=dim_head), context_dim = dim)
+ self.ff_pj = PreNorm(dim, FeedForward(dim, mult=ff_mult))
+
+ self.use_TAJA = args.use_TAJA
+ if self.use_TAJA:
+ self.rel_pos_embedding = nn.Embedding(10, dim // 4)
+ self.rel_pos_proj = nn.Linear(dim // 4, heads)
+ self.rel_pos_scale = nn.Parameter(torch.ones(1) * 0.1) # Initial value 0.1
+
+ def forward(self, point, joint, shape, valid_mask=None, graph_dist=None):
+ """
+ point: (B, Np, dim)
+ joint: (B, Nj, dim)
+ shape: (B, Ns, dim)
+ valid_mask: (B, Nj) or None
+ return:
+ updated_point, updated_joint
+ """
+
+ # 1) joint self-attention with TAJA
+ if self.use_TAJA:
+ batch_size, n_joints = joint.shape[0], joint.shape[1]
+
+ dist_mask = valid_mask.unsqueeze(1) & valid_mask.unsqueeze(2)
+ safe_dist = torch.where(dist_mask, graph_dist, torch.zeros_like(graph_dist))
+ distances_clamped = torch.clamp(safe_dist, 0, 9).long() # (B, Nj, Nj)
+
+ rel_pos_embeddings = self.rel_pos_embedding(distances_clamped) # (B, Nj, Nj, dim//4)
+ rel_pos_encoding = self.rel_pos_proj(rel_pos_embeddings) * self.rel_pos_scale # (B, Nj, Nj, heads)
+
+ rel_pos_encoding = torch.where(
+ dist_mask.unsqueeze(-1).expand_as(rel_pos_encoding),
+ rel_pos_encoding,
+ torch.zeros_like(rel_pos_encoding)
+ )
+ else:
+ rel_pos_encoding = None
+
+ joint_enhance = self.self_attn_j(joint, context=joint, context_mask=valid_mask, rel_pos=rel_pos_encoding) + joint
+ joint_enhance = self.ff_j(joint_enhance) + joint_enhance
+
+ # 2) point->shape
+ point_context = self.cross_attn_ps(point, context=shape) + point
+ point_context = self.ff_ps(point_context) + point_context
+
+ # 2) joint->shape
+ joint_context = self.cross_attn_js(joint_enhance, context=shape, query_mask=valid_mask) + joint_enhance
+ joint_context = self.ff_js(joint_context) + joint_context
+
+ # 3) joint->point
+ joint_refine = self.cross_attn_jp(joint_context, context=point_context, query_mask=valid_mask) + joint_context
+ joint_refine = self.ff_jp(joint_refine) + joint_refine
+
+ # 4) point->joint
+ point_final = self.cross_attn_pj(point_context, context=joint_refine, context_mask=valid_mask) + point_context
+ point_final = self.ff_pj(point_final) + point_final
+
+ return point_final, joint_refine
+
+class SkinningNetStacked(nn.Module):
+ def __init__(self, args, dim=768, heads=8, dim_head=64, ff_mult=4, scale_init=1.):
+ super().__init__()
+ self.args = args
+ self.max_joints = args.max_joints
+
+ self.skeleton_condition = PointEmbed(dim=dim)
+ self.scale = nn.Parameter(torch.tensor(scale_init), requires_grad=True)
+
+ self.point_encoder = load_model()
+ self.point_encoder.eval()
+ for param in self.point_encoder.parameters():
+ param.requires_grad = False
+
+ self.point_embed_pe = PointEmbed(dim=dim)
+ self.point_embed = partfield()
+ self.proj = nn.Sequential(
+ nn.Linear(448, dim),
+ nn.SiLU(),
+ nn.Linear(dim, dim),
+ )
+
+ # multiple blocks
+ self.blocks = nn.ModuleList([
+ SkinningBlock(dim, args, heads=heads, dim_head=dim_head, ff_mult=ff_mult)
+ for _ in range(args.depth)
+ ])
+
+ def process_point_feature(self, point_feature):
+ shape_latents = self.point_encoder.to_shape_latents(point_feature[:, 1:])
+ point_feature_first_column = point_feature[:, 0:1]
+ encode_feature = torch.cat([point_feature_first_column, shape_latents], dim=1)
+ return encode_feature
+
+ def forward(self,
+ sample_points, skeleton, pc_w_norm, dist_graph,
+ valid_mask=None, # (B, Nj)
+ target=None): # (B, Np, Nj)
+ """
+ cosine similarity + softmax + loss
+ """
+
+ point_out1 = self.point_embed(sample_points)
+ point_out1 = self.proj(point_out1)
+ point_out2 = self.point_embed_pe(sample_points)
+ point_out = point_out1 + point_out2 # (bs, 8192, 768)
+
+ joint_out = self.skeleton_condition(skeleton) # (bs, args.max_joints, 768)
+
+ point_feature = self.point_encoder.encode_latents(pc_w_norm)
+ shape_feature = self.process_point_feature(point_feature=point_feature) # (bs, 257, 768)
+
+ for block in self.blocks:
+ point_out, joint_out = block(point_out, joint_out, shape_feature, valid_mask=valid_mask, graph_dist=dist_graph)
+
+ point_norm = F.normalize(point_out, p=2, dim=-1) # (B, Np, D)
+ joint_norm = F.normalize(joint_out, p=2, dim=-1) # (B, Nj, D)
+
+ score_cos = einsum('b i d, b j d -> b i j', point_norm, joint_norm)
+ score = (self.scale.abs()+1e-9) * score_cos
+
+ skinning_weight = F.softmax(score, dim=-1)
+
+ if target is None:
+ return skinning_weight
+
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/skinning_models/networks.py b/third_party/Puppeteer/skinning/skinning_models/networks.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f3e58678ae99b0d7ec8e2951993466f6fbeec9b
--- /dev/null
+++ b/third_party/Puppeteer/skinning/skinning_models/networks.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2023 Biao Zhang
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: MIT
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under MIT, with the full license text
+# available at https://github.com/1zb/3DShape2VecSet/blob/master/LICENSE.
+#
+# This modified file is released under the same license.
+
+import numpy as np
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+
+from einops import rearrange, repeat
+from timm.models.layers import DropPath
+
+def exists(val):
+ return val is not None
+
+def default(val, d):
+ return val if exists(val) else d
+
+class PointEmbed(nn.Module):
+ def __init__(self, hidden_dim=48, dim=128):
+ super().__init__()
+
+ assert hidden_dim % 12 == 0
+
+ self.embedding_dim = hidden_dim
+ chunk_size = self.embedding_dim // 12
+ freq = torch.pow(2, torch.arange(chunk_size)).float() * np.pi
+
+ e = torch.zeros(6, chunk_size * 6)
+ for i in range(6):
+ start_idx = i * chunk_size
+ end_idx = start_idx + chunk_size
+ e[i, start_idx:end_idx] = freq
+
+ self.register_buffer('basis', e)
+ self.mlp = nn.Linear(self.embedding_dim + 6, dim)
+
+ @staticmethod
+ def embed(input, basis):
+ projections = torch.einsum('bnd,de->bne', input, basis)
+ embeddings = torch.cat([projections.sin(), projections.cos()], dim=2)
+ return embeddings
+
+ def forward(self, input):
+ # input: B x N x 6
+ x = self.embed(input, self.basis) # B,N,48
+ embed = self.mlp(torch.cat([x, input], dim=2)) # B x N x C
+ return embed
+
+class PreNorm(nn.Module):
+ def __init__(self, dim, fn, context_dim = None, modulated=False):
+ super().__init__()
+ self.fn = fn
+ self.norm = nn.LayerNorm(dim)
+ self.norm_context = nn.LayerNorm(context_dim) if exists(context_dim) else None
+
+ self.modulated = modulated
+ if self.modulated:
+ self.gamma = nn.Linear(dim, dim, bias=False)
+ self.beta = nn.Linear(dim, dim, bias=False)
+
+ def forward(self, x, **kwargs):
+ x = self.norm(x)
+
+ if self.modulated:
+ label = kwargs.pop('label')
+ gamma = self.gamma(label) # b 1 c
+ beta = self.beta(label) # b 1 c
+ x = gamma * x + beta
+
+ if exists(self.norm_context):
+ context = kwargs['context']
+ normed_context = self.norm_context(context)
+ kwargs.update(context = normed_context)
+
+ return self.fn(x, **kwargs)
+
+class GEGLU(nn.Module):
+ def forward(self, x):
+ x, gates = x.chunk(2, dim = -1)
+ return x * F.gelu(gates)
+
+class FeedForward(nn.Module):
+ def __init__(self, dim, mult = 4, drop_path_rate = 0.0):
+ super().__init__()
+ self.net = nn.Sequential(
+ nn.Linear(dim, dim * mult * 2),
+ GEGLU(),
+ nn.Linear(dim * mult, dim)
+ )
+
+ self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
+
+ def forward(self, x):
+ return self.drop_path(self.net(x))
+
+class Attention(nn.Module):
+ def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64, drop_path_rate = 0.0):
+ super().__init__()
+ inner_dim = dim_head * heads
+ context_dim = default(context_dim, query_dim)
+ self.scale = dim_head ** -0.5
+ self.heads = heads
+
+ self.to_q = nn.Linear(query_dim, inner_dim, bias = False)
+ self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias = False)
+ self.to_out = nn.Linear(inner_dim, query_dim)
+
+ self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
+
+ def forward(self, x, context = None, query_mask = None, context_mask=None, rel_pos=None):
+ h = self.heads
+
+ q = self.to_q(x)
+ context = default(context, x)
+ k, v = self.to_kv(context).chunk(2, dim = -1)
+ q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
+ sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
+
+ if exists(rel_pos):
+ # rel_pos shape expected to be [b, i, j, h] or [b h, i, j]
+ if rel_pos.dim() == 4: # [b, i, j, h]
+ # Reshape to match attention heads dimension
+ rel_pos = rearrange(rel_pos, 'b i j h -> (b h) i j')
+
+ # Add the relative positional bias to the attention scores
+ sim = sim + rel_pos
+
+ if exists(query_mask): # shape (B, Nq)
+ query_mask = query_mask.bool()
+ if query_mask.dim() == 2:
+ query_mask = repeat(query_mask, 'b i -> (b h) i 1', h=h)
+ elif query_mask.dim() == 3:
+ query_mask = repeat(query_mask, 'b n j -> (b h) n j', h=h)
+ sim.masked_fill_(~query_mask, -torch.finfo(sim.dtype).max)
+
+
+ if exists(context_mask):
+ context_mask_bool = context_mask.bool()
+ if context_mask_bool.dim() == 2:
+ context_mask_bool = repeat(context_mask_bool, 'b j -> (b h) 1 j', h=h)
+ elif context_mask_bool.dim() == 3:
+ context_mask_bool = repeat(context_mask_bool, 'b n j -> (b h) n j', h=h)
+ sim.masked_fill_(~context_mask_bool, -torch.finfo(sim.dtype).max)
+
+ attn = sim.softmax(dim = -1)
+
+ out = einsum('b i j, b j d -> b i d', attn, v)
+ out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
+ return self.drop_path(self.to_out(out))
diff --git a/third_party/Puppeteer/skinning/utils/misc.py b/third_party/Puppeteer/skinning/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..de4190715fc64cba90440683034f4bc35f3672b8
--- /dev/null
+++ b/third_party/Puppeteer/skinning/utils/misc.py
@@ -0,0 +1,373 @@
+# Copyright (c) 2023 Biao Zhang
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: MIT
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under MIT, with the full license text
+# available at https://github.com/1zb/3DShape2VecSet/blob/master/LICENSE.
+#
+# This modified file is released under the same license.
+
+import builtins
+import datetime
+import os
+import time
+from collections import defaultdict, deque
+from pathlib import Path
+
+import torch
+import torch.distributed as dist
+import numpy as np
+from typing import List
+
+if torch.__version__[0] == '2':
+ from torch import inf
+else:
+ from torch._six import inf
+
+class SmoothedValue(object):
+ """Track a series of values and provide access to smoothed values over a
+ window or the global series average.
+ """
+
+ def __init__(self, window_size=20, fmt=None):
+ if fmt is None:
+ fmt = "{median:.4f} ({global_avg:.4f})"
+ self.deque = deque(maxlen=window_size)
+ self.total = 0.0
+ self.count = 0
+ self.fmt = fmt
+
+ def update(self, value, n=1):
+ self.deque.append(value)
+ self.count += n
+ self.total += value * n
+
+ def synchronize_between_processes(self):
+ """
+ Warning: does not synchronize the deque!
+ """
+ if not is_distributed():
+ return
+ t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
+ barrier()
+ all_reduce_sum(t)
+ t = t.tolist()
+ self.count = int(t[0])
+ self.total = t[1]
+
+ @property
+ def median(self):
+ d = torch.tensor(list(self.deque))
+ return d.median().item()
+
+ @property
+ def avg(self):
+ d = torch.tensor(list(self.deque), dtype=torch.float32)
+ return d.mean().item()
+
+ @property
+ def global_avg(self):
+ return self.total / self.count
+
+ @property
+ def max(self):
+ return max(self.deque)
+
+ @property
+ def value(self):
+ return self.deque[-1]
+
+ def __str__(self):
+ return self.fmt.format(
+ median=self.median,
+ avg=self.avg,
+ global_avg=self.global_avg,
+ max=self.max,
+ value=self.value,
+ )
+
+
+class MetricLogger(object):
+ def __init__(self, delimiter="\t"):
+ self.meters = defaultdict(SmoothedValue)
+ self.delimiter = delimiter
+
+ def update(self, **kwargs):
+ for k, v in kwargs.items():
+ if v is None:
+ continue
+ if isinstance(v, torch.Tensor):
+ v = v.item()
+ assert isinstance(v, (float, int))
+ self.meters[k].update(v)
+
+ def __getattr__(self, attr):
+ if attr in self.meters:
+ return self.meters[attr]
+ if attr in self.__dict__:
+ return self.__dict__[attr]
+ raise AttributeError("'{}' object has no attribute '{}'".format(
+ type(self).__name__, attr))
+
+ def __str__(self):
+ loss_str = []
+ for name, meter in self.meters.items():
+ loss_str.append(
+ "{}: {}".format(name, str(meter))
+ )
+ return self.delimiter.join(loss_str)
+
+ def synchronize_between_processes(self):
+ for meter in self.meters.values():
+ meter.synchronize_between_processes()
+
+ def add_meter(self, name, meter):
+ self.meters[name] = meter
+
+ def log_every(self, iterable, print_freq, header=None):
+ i = 0
+ if not header:
+ header = ''
+ start_time = time.time()
+ end = time.time()
+ iter_time = SmoothedValue(fmt='{avg:.4f}')
+ data_time = SmoothedValue(fmt='{avg:.4f}')
+ space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+ log_msg = [
+ header,
+ '[{0' + space_fmt + '}/{1}]',
+ 'eta: {eta}',
+ '{meters}',
+ 'time: {time}',
+ 'data: {data}'
+ ]
+ if torch.cuda.is_available():
+ log_msg.append('max mem: {memory:.0f}')
+ log_msg = self.delimiter.join(log_msg)
+ MB = 1024.0 * 1024.0
+ for obj in iterable:
+ data_time.update(time.time() - end)
+ yield obj
+ iter_time.update(time.time() - end)
+ if i % print_freq == 0 or i == len(iterable) - 1:
+ eta_seconds = iter_time.global_avg * (len(iterable) - i)
+ eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+ if torch.cuda.is_available():
+ print(log_msg.format(
+ i, len(iterable), eta=eta_string,
+ meters=str(self),
+ time=str(iter_time), data=str(data_time),
+ memory=torch.cuda.max_memory_allocated() / MB))
+ else:
+ print(log_msg.format(
+ i, len(iterable), eta=eta_string,
+ meters=str(self),
+ time=str(iter_time), data=str(data_time)))
+ i += 1
+ end = time.time()
+ total_time = time.time() - start_time
+ total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+ print('{} Total time: {} ({:.4f} s / it)'.format(
+ header, total_time_str, total_time / len(iterable)))
+
+
+def setup_for_distributed(is_master):
+ """
+ This function disables printing when not in master process
+ """
+ builtin_print = builtins.print
+
+ def print(*args, **kwargs):
+ force = kwargs.pop('force', False)
+ force = force or (get_world_size() > 8)
+ if is_master:# or force:
+ now = datetime.datetime.now().time()
+ builtin_print('[{}] '.format(now), end='') # print with time stamp
+ builtin_print(*args, **kwargs)
+
+ builtins.print = print
+
+
+def is_dist_avail_and_initialized():
+ if not dist.is_available():
+ return False
+ if not dist.is_initialized():
+ return False
+ return True
+
+
+def get_world_size():
+ if not is_dist_avail_and_initialized():
+ return 1
+ return dist.get_world_size()
+
+
+def get_rank():
+ if not is_dist_avail_and_initialized():
+ return 0
+ return dist.get_rank()
+
+
+def is_main_process():
+ return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+ if is_main_process():
+ torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+ if args.dist_on_itp:
+ args.rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
+ args.world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
+ args.gpu = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
+ args.dist_url = "tcp://%s:%s" % (os.environ['MASTER_ADDR'], os.environ['MASTER_PORT'])
+ os.environ['LOCAL_RANK'] = str(args.gpu)
+ os.environ['RANK'] = str(args.rank)
+ os.environ['WORLD_SIZE'] = str(args.world_size)
+ # ["RANK", "WORLD_SIZE", "MASTER_ADDR", "MASTER_PORT", "LOCAL_RANK"]
+ elif 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+ args.rank = int(os.environ["RANK"])
+ args.world_size = int(os.environ['WORLD_SIZE'])
+ args.gpu = int(os.environ['LOCAL_RANK'])
+ elif 'SLURM_PROCID' in os.environ:
+ args.rank = int(os.environ['SLURM_PROCID'])
+ args.gpu = args.rank % torch.cuda.device_count()
+ else:
+ print('Not using distributed mode')
+ setup_for_distributed(is_master=True) # hack
+ args.distributed = False
+ return
+
+ args.distributed = True
+
+ torch.cuda.set_device(args.gpu)
+ args.dist_backend = 'nccl'
+ print('| distributed init (rank {}): {}, gpu {}'.format(
+ args.rank, args.dist_url, args.gpu), flush=True)
+ torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
+ world_size=args.world_size, rank=args.rank)
+ torch.distributed.barrier()
+ setup_for_distributed(args.rank == 0)
+
+
+class NativeScalerWithGradNormCount:
+ state_dict_key = "amp_scaler"
+
+ def __init__(self):
+ self._scaler = torch.cuda.amp.GradScaler()
+
+ def __call__(self, loss, optimizer, clip_grad=None, parameters=None, create_graph=False, update_grad=True):
+ self._scaler.scale(loss).backward(create_graph=create_graph)
+ if update_grad:
+ if clip_grad is not None:
+ assert parameters is not None
+ self._scaler.unscale_(optimizer) # unscale the gradients of optimizer's assigned params in-place
+ norm = torch.nn.utils.clip_grad_norm_(parameters, clip_grad)
+ else:
+ self._scaler.unscale_(optimizer)
+ norm = get_grad_norm_(parameters)
+ self._scaler.step(optimizer)
+ self._scaler.update()
+ else:
+ norm = None
+ return norm
+
+ def state_dict(self):
+ return self._scaler.state_dict()
+
+ def load_state_dict(self, state_dict):
+ self._scaler.load_state_dict(state_dict)
+
+
+def get_grad_norm_(parameters, norm_type: float = 2.0) -> torch.Tensor:
+ if isinstance(parameters, torch.Tensor):
+ parameters = [parameters]
+ parameters = [p for p in parameters if p.grad is not None]
+ norm_type = float(norm_type)
+ if len(parameters) == 0:
+ return torch.tensor(0.)
+ device = parameters[0].grad.device
+ if norm_type == inf:
+ total_norm = max(p.grad.detach().abs().max().to(device) for p in parameters)
+ else:
+ total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type)
+ return total_norm
+
+
+def save_model(args, epoch, model, model_without_ddp, optimizer, loss_scaler):
+ output_dir = Path(args.output_dir)
+ epoch_name = str(epoch)
+ if loss_scaler is not None:
+ checkpoint_paths = [output_dir / ('checkpoint-%s.pth' % epoch_name)]
+ for checkpoint_path in checkpoint_paths:
+ to_save = {
+ 'model': model_without_ddp.state_dict(),
+ 'optimizer': optimizer.state_dict(),
+ 'epoch': epoch,
+ 'scaler': loss_scaler.state_dict(),
+ 'args': args,
+ }
+
+ save_on_master(to_save, checkpoint_path)
+ print("save")
+ else:
+ client_state = {'epoch': epoch}
+ print("save fail")
+ model.save_checkpoint(save_dir=args.output_dir, tag="checkpoint-%s" % epoch_name, client_state=client_state)
+
+
+def load_model(args, model_without_ddp, optimizer, loss_scaler):
+ if args.resume:
+ if args.resume.startswith('https'):
+ checkpoint = torch.hub.load_state_dict_from_url(
+ args.resume, map_location='cpu', check_hash=True)
+ else:
+ checkpoint = torch.load(args.resume, map_location='cpu')
+ model_without_ddp.load_state_dict(checkpoint['model'])
+ print("Resume checkpoint %s" % args.resume)
+ if 'optimizer' in checkpoint and 'epoch' in checkpoint and not (hasattr(args, 'eval') and args.eval):
+ optimizer.load_state_dict(checkpoint['optimizer'])
+ args.start_epoch = checkpoint['epoch'] + 1
+ if 'scaler' in checkpoint:
+ loss_scaler.load_state_dict(checkpoint['scaler'])
+ print("With optim & sched!")
+
+
+def all_reduce_mean(x):
+ world_size = get_world_size()
+ if world_size > 1:
+ x_reduce = torch.tensor(x).cuda()
+ dist.all_reduce(x_reduce)
+ x_reduce /= world_size
+ return x_reduce.item()
+ else:
+ return x
+
+
+def is_distributed():
+ if not dist.is_available() or not dist.is_initialized():
+ return False
+ return True
+
+
+def barrier():
+ if not is_distributed():
+ return
+ torch.distributed.barrier()
+
+def all_reduce_sum(tensor):
+ if not is_distributed():
+ return tensor
+ dim_squeeze = False
+ if tensor.ndim == 0:
+ tensor = tensor[None, ...]
+ dim_squeeze = True
+ torch.distributed.all_reduce(tensor)
+ print("loss_tensor: ", tensor)
+ if dim_squeeze:
+ tensor = tensor.squeeze(0)
+ return tensor
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/utils/rig_parser.py b/third_party/Puppeteer/skinning/utils/rig_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..46e78e89008866db9ae8189f9d64caebbc317338
--- /dev/null
+++ b/third_party/Puppeteer/skinning/utils/rig_parser.py
@@ -0,0 +1,153 @@
+# Copyright (c) 2007 Free Software Foundation, Inc.
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: GNU General Public License v3.0
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under GNU General Public License v3.0, with the full license text
+# available at https://github.com/zhan-xu/RigNet/blob/master/LICENSE-GPLv3.txt.
+#
+# This modified file is released under the same license.
+
+
+import numpy as np
+try:
+ import Queue as Q # ver. < 3.0
+except ImportError:
+ import queue as Q
+
+class Node(object):
+ def __init__(self, name, pos):
+ self.name = name
+ self.pos = pos
+
+class TreeNode(Node):
+ def __init__(self, name, pos):
+ super(TreeNode, self).__init__(name, pos)
+ self.children = []
+ self.parent = None
+
+
+class Info:
+ """
+ Wrap class for rig information
+ """
+ def __init__(self, filename=None):
+ self.joint_pos = {}
+ self.joint_skin = []
+ self.root = None
+ if filename is not None:
+ self.load(filename)
+
+ def load(self, filename):
+ with open(filename, 'r') as f_txt:
+ lines = f_txt.readlines()
+
+ for line in lines:
+ word = line.split()
+ if word[0] == 'joints':
+ self.joint_pos[word[1]] = [float(word[2]), float(word[3]), float(word[4])]
+
+ for line in lines:
+ word = line.split()
+ if word[0] == 'root':
+ root_pos = self.joint_pos[word[1]]
+ self.root = TreeNode(word[1], (root_pos[0], root_pos[1], root_pos[2]))
+ elif word[0] == 'skin':
+ skin_item = word[1:]
+ self.joint_skin.append(skin_item)
+ self.loadHierarchy_recur(self.root, lines, self.joint_pos)
+
+ def loadHierarchy_recur(self, node, lines, joint_pos):
+ for li in lines:
+ if li.split()[0] == 'hier' and li.split()[1] == node.name:
+ pos = joint_pos[li.split()[2]]
+ ch_node = TreeNode(li.split()[2], tuple(pos))
+ node.children.append(ch_node)
+ ch_node.parent = node
+ self.loadHierarchy_recur(ch_node, lines, joint_pos)
+
+ def save(self, filename):
+ with open(filename, 'w') as file_info:
+ for key, val in self.joint_pos.items():
+ file_info.write(
+ 'joints {0} {1:.8f} {2:.8f} {3:.8f}\n'.format(key, val[0], val[1], val[2]))
+ file_info.write('root {}\n'.format(self.root.name))
+
+ for skw in self.joint_skin:
+ cur_line = 'skin {0} '.format(skw[0])
+ for cur_j in range(1, len(skw), 2):
+ cur_line += '{0} {1:.2f} '.format(skw[cur_j], float(skw[cur_j+1]))
+ cur_line += '\n'
+ file_info.write(cur_line)
+
+ this_level = self.root.children
+ while this_level:
+ next_level = []
+ for p_node in this_level:
+ file_info.write('hier {0} {1}\n'.format(p_node.parent.name, p_node.name))
+ next_level += p_node.children
+ this_level = next_level
+
+ def save_as_skel_format(self, filename):
+ fout = open(filename, 'w')
+ this_level = [self.root]
+ hier_level = 1
+ while this_level:
+ next_level = []
+ for p_node in this_level:
+ pos = p_node.pos
+ parent = p_node.parent.name if p_node.parent is not None else 'None'
+ line = '{0} {1} {2:8f} {3:8f} {4:8f} {5}\n'.format(hier_level, p_node.name, pos[0], pos[1], pos[2],
+ parent)
+ fout.write(line)
+ for c_node in p_node.children:
+ next_level.append(c_node)
+ this_level = next_level
+ hier_level += 1
+ fout.close()
+
+ def normalize(self, scale, trans):
+ for k, v in self.joint_pos.items():
+ self.joint_pos[k] /= scale
+ self.joint_pos[k] -= trans
+
+
+ this_level = [self.root]
+ while this_level:
+ next_level = []
+ for node in this_level:
+ node.pos /= scale
+ node.pos = (node.pos[0] - trans[0], node.pos[1] - trans[1], node.pos[2] - trans[2])
+ for ch in node.children:
+ next_level.append(ch)
+ this_level = next_level
+
+ def get_joint_dict(self):
+ joint_dict = {}
+ this_level = [self.root]
+ while this_level:
+ next_level = []
+ for node in this_level:
+ joint_dict[node.name] = node.pos
+ next_level += node.children
+ this_level = next_level
+ return joint_dict
+
+ def adjacent_matrix(self):
+ joint_pos = self.get_joint_dict()
+ joint_name_list = list(joint_pos.keys())
+ num_joint = len(joint_pos)
+ adj_matrix = np.zeros((num_joint, num_joint))
+ this_level = [self.root]
+ while this_level:
+ next_level = []
+ for p_node in this_level:
+ for c_node in p_node.children:
+ index_parent = joint_name_list.index(p_node.name)
+ index_children = joint_name_list.index(c_node.name)
+ adj_matrix[index_parent, index_children] = 1.
+ next_level += p_node.children
+ this_level = next_level
+ adj_matrix = adj_matrix + adj_matrix.transpose()
+ return adj_matrix
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/utils/save_h5.py b/third_party/Puppeteer/skinning/utils/save_h5.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a2025400f33879d353c1a7416f124b01f5ea9a4
--- /dev/null
+++ b/third_party/Puppeteer/skinning/utils/save_h5.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import h5py
+from tqdm import tqdm
+import scipy.sparse as sp
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from functools import partial
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
+from utils.util import read_obj_file, read_rig_file, normalize_to_unit_cube, build_adjacency_list, compute_graph_distance, get_tpl_edges
+
+@dataclass
+class ProcessedSample:
+ """Data structure for a processed sample."""
+ vertices: np.ndarray
+ faces: np.ndarray
+ joints: np.ndarray
+ bones: np.ndarray
+ root_index: int
+ pc_w_norm: np.ndarray
+ file_name: str
+ skin: np.ndarray
+ graph_dist: np.ndarray
+ edges: np.ndarray
+
+def process_sample(data: Dict[str, Any]) -> Optional[ProcessedSample]:
+ """
+ Process a single sample from the dataset.
+
+ Args:
+ data: Dictionary containing sample data
+
+ Returns:
+ ProcessedSample object or None if processing fails
+ """
+ vertices = data['vertices'].copy()
+ joints = data['joints'].copy()
+ if len(joints) > 70: # filter out data with too many joints
+ return None
+
+ vertices, center, scale = normalize_to_unit_cube(vertices, 0.9995)
+ joints = (joints - center) * scale
+
+ # Build skinning weights matrix
+ skinning_data = data['skinning_weights_value']
+ skinning_rows = data['skinning_weights_row']
+ skinning_cols = data['skinning_weights_col']
+ skinning_shape = data['skinning_weights_shape']
+
+ skinning_sparse = sp.coo_matrix(
+ (skinning_data, (skinning_rows, skinning_cols)),
+ shape=skinning_shape
+ )
+ skinning_weights = skinning_sparse.toarray() # (n_vertex, n_joints)
+
+ # Compute topology and graph features
+ edges = get_tpl_edges(data['vertices'], data['faces'])
+ num_joints = len(data['joints'])
+ adjacency = build_adjacency_list(num_joints, data['bones'])
+ graph_dist = compute_graph_distance(num_joints, adjacency)
+
+ return ProcessedSample(
+ vertices=vertices,
+ faces=data['faces'],
+ joints=joints,
+ bones=data['bones'],
+ root_index=data['root_index'],
+ pc_w_norm=data['pc_w_norm'],
+ file_name=data['uuid'],
+ skin=skinning_weights,
+ graph_dist=graph_dist,
+ edges=edges
+ )
+
+def parallel_process_samples(
+ data_list: List[Dict[str, Any]],
+ max_workers: Optional[int] = None
+) -> List[ProcessedSample]:
+ """
+ Process multiple samples in parallel.
+
+ Args:
+ data_list: List of sample dictionaries
+ max_workers: Maximum number of worker processes
+
+ Returns:
+ List of successfully processed samples
+ """
+ processed_samples = []
+
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
+ # Submit all tasks
+ futures = {executor.submit(process_sample, data): data for data in data_list}
+
+ # Process results with progress bar
+ for future in tqdm(as_completed(futures), total=len(futures), desc='Processing samples'):
+ try:
+ result = future.result()
+ if result is not None:
+ processed_samples.append(result)
+ else:
+ original_data = futures[future]
+ except Exception as e:
+ original_data = futures[future]
+ print(f"Exception in processing {original_data.get('file_name', 'unknown')}: {e}")
+
+ return processed_samples
+
+def save_to_h5(processed_samples: List[ProcessedSample], output_path: str) -> None:
+ """
+ Save processed samples to HDF5 file.
+
+ Args:
+ processed_samples: List of processed samples
+ output_path: Output HDF5 file path
+ """
+ with h5py.File(output_path, 'w') as f:
+ # Add metadata
+ f.attrs['num_samples'] = len(processed_samples)
+ f.attrs['version'] = '1.0'
+
+ for i, sample in enumerate(tqdm(processed_samples, desc='Saving to HDF5')):
+ grp = f.create_group(f'sample_{i}')
+
+ # Save arrays with compression
+ grp.create_dataset('joints', data=sample.joints, compression='gzip')
+ grp.create_dataset('bones', data=sample.bones, compression='gzip')
+ grp.create_dataset('root_index', data=sample.root_index, dtype='i')
+ grp.create_dataset('pc_w_norm', data=sample.pc_w_norm, compression='gzip')
+ grp.create_dataset('vertices', data=sample.vertices, compression='gzip')
+ grp.create_dataset('faces', data=sample.faces, compression='gzip')
+ grp.create_dataset('edges', data=sample.edges, compression='gzip')
+ grp.create_dataset('skin', data=sample.skin, compression='gzip')
+ grp.create_dataset('graph_dist', data=sample.graph_dist, compression='gzip')
+ string_dtype = h5py.special_dtype(vlen=str)
+ grp.create_dataset('file_name', data=sample.file_name, dtype=string_dtype)
+
+
+def main(npz_file_path, h5_file_path, max_workers):
+ loaded_data = np.load(npz_file_path, allow_pickle=True)
+ data_list = loaded_data['arr_0']
+
+ num_samples = len(data_list)
+ print(f"Total samples: {num_samples}")
+
+ processed_samples = parallel_process_samples(
+ data_list=data_list,
+ max_workers=max_workers
+ )
+ save_to_h5(processed_samples, h5_file_path)
+ print("Processing complete!")
+
+if __name__ == '__main__':
+ npz_file_path = 'articulation_xlv2_test.npz'
+ h5_file_path = 'articulation_xlv2_test.h5'
+ main(npz_file_path, h5_file_path, max_workers=8)
diff --git a/third_party/Puppeteer/skinning/utils/skin_data.py b/third_party/Puppeteer/skinning/utils/skin_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..0970e786ebd6068294757a94484a1aaf3a30072f
--- /dev/null
+++ b/third_party/Puppeteer/skinning/utils/skin_data.py
@@ -0,0 +1,250 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import h5py
+import numpy as np
+import torch
+import torch.utils.data as data
+import trimesh
+from collections import deque
+from utils.util import process_mesh_to_pc, read_obj_file, read_rig_file, normalize_to_unit_cube, build_adjacency_list, \
+ compute_graph_distance, get_tpl_edges
+
+class SkinData(data.Dataset):
+ def __init__(self, args, mode, query_num=4096):
+ self.args = args
+ self.query_num = query_num
+ self.mode = mode # train, eval, generate
+
+ if mode == 'eval':
+ self._init_h5_data(args)
+ elif mode == 'generate':
+ self._init_file_data(args)
+ else:
+ raise ValueError(f"Unsupported mode: {mode}")
+
+ def _init_h5_data(self, args):
+ """Initialize for H5 file-based evaluation"""
+ self.data_source = 'h5'
+ self.eval_data_path = args.eval_data_path
+ self.h5_file = None
+
+ with h5py.File(self.eval_data_path, 'r') as f:
+ self.num_samples = len(f.keys())
+ print(f"[SkinData] found {self.num_samples} samples in the dataset.")
+
+ def _init_file_data(self, args):
+ """Initialize for mesh/rig file-based generation"""
+ self.data_source = 'files'
+ self.mesh_folder = args.mesh_folder
+ self.rig_files_dir = args.input_skel_folder
+
+ # Get list of available samples
+ self.sample_files = []
+ for obj_file in os.listdir(self.mesh_folder):
+ if obj_file.endswith('.obj'):
+ file_name = os.path.splitext(obj_file)[0]
+ rig_file_path = os.path.join(self.rig_files_dir, f'{file_name}.txt')
+ if os.path.exists(rig_file_path):
+ self.sample_files.append((obj_file, rig_file_path, file_name))
+
+ self.num_samples = len(self.sample_files)
+ print(f"[SkinData] found {self.num_samples} samples for generation.")
+
+ def _load_h5_data(self, idx):
+ """Load data from H5 file"""
+ if self.h5_file is None:
+ self.h5_file = h5py.File(self.eval_data_path, 'r')
+
+ data = self.h5_file[f'sample_{idx}']
+
+ sample_points = data['pc_w_norm'][:, :3]
+ normal = data['pc_w_norm'][:, 3:]
+ joints = data['joints'][:]
+ bones = data['bones'][:]
+ root_index = data['root_index'][()]
+ graph_dist = data['graph_dist'][:]
+
+ file_name = data['file_name'][()].decode('utf-8')
+ vertices = data['vertices'][:]
+ faces = data['faces'][:]
+ edges = data['edges'][:]
+ gt_skin = data['skin'][:]
+
+ return {
+ 'sample_points': sample_points,
+ 'normal': normal,
+ 'joints': joints,
+ 'bones': bones,
+ 'root_index': root_index,
+ 'graph_dist': graph_dist,
+ 'file_name': file_name,
+ 'vertices': vertices,
+ 'faces': faces,
+ 'edges': edges,
+ 'gt_skin': gt_skin
+ }
+
+ def _load_file_data(self, idx):
+ """Load data from mesh and rig files"""
+ obj_file, rig_file_path, file_name = self.sample_files[idx]
+
+ # Load mesh
+ mesh_file_path = os.path.join(self.mesh_folder, obj_file)
+ vertices, faces = read_obj_file(mesh_file_path)
+
+ # Create trimesh object and process to point cloud
+ mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
+ pc_w_norm, _ = process_mesh_to_pc(mesh, sample_num=8192)
+ sample_points = pc_w_norm[:, :3]
+ normal = pc_w_norm[:, 3:]
+
+ # Load rig data
+ joints, bones, root_index = read_rig_file(rig_file_path)
+
+ # Normalize mesh and joints
+ vertices, center, scale = normalize_to_unit_cube(vertices, 0.9995)
+ joints -= center
+ joints *= scale
+
+ # Get edges
+ edges = get_tpl_edges(vertices, faces)
+
+ # Compute graph distance
+ num_joints = joints.shape[0]
+ adjacency = build_adjacency_list(num_joints, bones)
+ graph_dist = compute_graph_distance(num_joints, adjacency)
+
+ return {
+ 'sample_points': sample_points,
+ 'normal': normal,
+ 'joints': joints,
+ 'bones': bones,
+ 'root_index': root_index,
+ 'graph_dist': graph_dist,
+ 'file_name': file_name,
+ 'vertices': vertices,
+ 'faces': faces,
+ 'edges': edges
+ }
+
+ def _process_data(self, data_dict):
+ """Common processing for both data sources"""
+ sample_points = data_dict['sample_points']
+ normal = data_dict['normal']
+ joints = data_dict['joints']
+ bones = data_dict['bones']
+ root_index = data_dict['root_index']
+ graph_dist = data_dict['graph_dist']
+ file_name = data_dict['file_name']
+ vertices = data_dict['vertices']
+ faces = data_dict['faces']
+ edges = data_dict['edges']
+ if 'gt_skin' in data_dict:
+ gt_skin = data_dict['gt_skin']
+
+ # Random sampling for query points
+ ind = np.random.default_rng().choice(sample_points.shape[0], self.query_num, replace=False)
+ query_points = sample_points[ind]
+ query_normal = normal[ind]
+
+ # Normalize to (-0.5, 0.5)
+ bounds = np.array([sample_points.min(axis=0), sample_points.max(axis=0)])
+ center = (bounds[0] + bounds[1]) / 2
+ scale = (bounds[1] - bounds[0]).max() + 1e-5
+
+ sample_points = (sample_points - center) / scale
+ query_points = (query_points - center) / scale
+ joints = (joints - center) / scale
+ vertices = (vertices - center) / scale
+
+ # Normalize normals
+ pc_coor = sample_points
+ normal_norm = np.linalg.norm(normal, axis=1, keepdims=True)
+ normal = normal / (normal_norm + 1e-8)
+
+ query_points = query_points.clip(-0.5, 0.5)
+ joints = joints.clip(-0.5, 0.5)
+
+ # Process joints to bone coordinates format
+ j = joints.shape[0]
+ bone_coor = np.zeros((j, 6))
+ bone_coor[:, 3:] = joints
+
+ # Create parent indices array
+ parent_indices = np.ones(j, dtype=np.int32) * -1
+
+ # Fill parent information using bones array
+ for parent, child in bones:
+ if parent_indices[child] == -1:
+ parent_indices[child] = parent
+
+ # Set root node parent to itself
+ parent_indices[root_index] = root_index
+
+ # Get parent coordinates
+ valid_mask = parent_indices != -1
+ bone_coor[valid_mask, :3] = joints[parent_indices[valid_mask]]
+
+ # Convert to tensors
+ query_points = torch.from_numpy(query_points).float()
+ query_points_normal = torch.from_numpy(np.concatenate([query_points, query_normal], axis=-1)).float()
+ bone_coor = torch.from_numpy(bone_coor).float()
+ graph_dist = torch.from_numpy(graph_dist).float()
+ edges = torch.from_numpy(edges).long()
+ vertices = torch.from_numpy(vertices).float()
+ if 'gt_skin' in data_dict:
+ gt_skin = torch.from_numpy(gt_skin).float()
+
+ pc_coor = pc_coor / np.abs(pc_coor).max() * 0.9995
+ pc_w_norm = torch.from_numpy(np.concatenate([pc_coor, normal], axis=-1)).float()
+
+ # Handle joint padding
+ max_joints = self.args.max_joints
+ num_joints = bone_coor.shape[0]
+ padding_size = max_joints - num_joints
+
+ if padding_size > 0:
+ bone_coor = torch.nn.functional.pad(bone_coor, (0, 0, 0, padding_size), 'constant', 0)
+ graph_dist = torch.nn.functional.pad(
+ graph_dist,
+ pad=(0, padding_size, 0, padding_size),
+ mode='constant',
+ value=999
+ )
+ if 'gt_skin' in data_dict:
+ gt_skin = torch.nn.functional.pad(gt_skin, (0, padding_size), 'constant', 0)
+
+ # Create valid joints mask
+ valid_joints_mask = torch.zeros(max_joints, dtype=torch.bool)
+ valid_joints_mask[:num_joints] = True
+
+ if 'gt_skin' in data_dict:
+ return query_points_normal, pc_w_norm, bone_coor, valid_joints_mask, graph_dist, vertices, file_name, edges, gt_skin
+ else:
+ return query_points_normal, pc_w_norm, bone_coor, valid_joints_mask, graph_dist, vertices, file_name, edges
+
+ def __getitem__(self, idx):
+ # Load data based on source
+ if self.data_source == 'h5':
+ data_dict = self._load_h5_data(idx)
+ else: # files
+ data_dict = self._load_file_data(idx)
+
+ # data processing
+ return self._process_data(data_dict)
+
+ def __len__(self):
+ return self.num_samples
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/utils/tableau_color.npy b/third_party/Puppeteer/skinning/utils/tableau_color.npy
new file mode 100644
index 0000000000000000000000000000000000000000..dd1b0b0cd4a3b95cc88556762ea4b0c53b0a2b21
--- /dev/null
+++ b/third_party/Puppeteer/skinning/utils/tableau_color.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:780ca107d438c61287fd1845248877371467ae8b74d7a3ab06669671da3addb9
+size 608
diff --git a/third_party/Puppeteer/skinning/utils/util.py b/third_party/Puppeteer/skinning/utils/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e932c696be1d463c5c2aa68614141904f8d6b1f
--- /dev/null
+++ b/third_party/Puppeteer/skinning/utils/util.py
@@ -0,0 +1,286 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+import cv2
+import json
+import trimesh
+import skimage.measure
+import trimesh
+import mesh2sdf.core
+
+from utils.rig_parser import Info
+from collections import deque, defaultdict
+from scipy.cluster.hierarchy import linkage, fcluster
+
+def read_obj_file(file_path):
+ """Read OBJ file and return vertices and faces"""
+ vertices = []
+ faces = []
+
+ with open(file_path, 'r') as file:
+ for line in file:
+ if line.startswith('v '):
+ parts = line.split()[1:]
+ vertices.append([float(parts[0]), float(parts[1]), float(parts[2])])
+ elif line.startswith('f '):
+ parts = line.split()[1:]
+ face = [int(part.split('/')[0]) - 1 for part in parts]
+ faces.append(face)
+
+ return np.array(vertices), np.array(faces)
+
+def read_rig_file(file_path):
+ """Read rig file and return joints, bones, and root index"""
+ joints = []
+ bones = []
+ joint_mapping = {}
+ joint_index = 0
+
+ with open(file_path, 'r') as file:
+ lines = file.readlines()
+
+ for line in lines:
+ if line.startswith('joints'):
+ parts = line.split()
+ name = parts[1]
+ position = [float(parts[2]), float(parts[3]), float(parts[4])]
+ joints.append(position)
+ joint_mapping[name] = joint_index
+ joint_index += 1
+ elif line.startswith('hier'):
+ parts = line.split()
+ parent_joint = joint_mapping[parts[1]]
+ child_joint = joint_mapping[parts[2]]
+ bones.append([parent_joint, child_joint])
+ elif line.startswith('root'):
+ parts = line.split()
+ root = joint_mapping[parts[1]]
+
+ return np.array(joints), np.array(bones), root
+
+def normalize_to_unit_cube(vertices, scale_factor=1.0):
+ min_coords = vertices.min(axis=0)
+ max_coords = vertices.max(axis=0)
+ center = (max_coords + min_coords) / 2.0
+
+ vertices -= center
+ scale = 1.0 / np.abs(vertices).max() * scale_factor
+ vertices *= scale
+
+ return vertices, center, scale
+
+def build_adjacency_list(num_joints, bones):
+ """Build adjacency list for graph distance computation"""
+ adjacency = [[] for _ in range(num_joints)]
+ for (p, c) in bones:
+ adjacency[p].append(c)
+ adjacency[c].append(p)
+ return adjacency
+
+def compute_graph_distance(num_joints, adjacency):
+ """Compute graph distance using BFS"""
+ graph_dist = np.full((num_joints, num_joints), np.inf, dtype=np.float32)
+
+ for start in range(num_joints):
+ queue = deque()
+ queue.append((start, 0))
+ graph_dist[start, start] = 0.0
+
+ while queue:
+ current, dist = queue.popleft()
+ for nbr in adjacency[current]:
+ if graph_dist[start, nbr] == np.inf:
+ graph_dist[start, nbr] = dist + 1
+ queue.append((nbr, dist + 1))
+
+ return graph_dist
+
+def get_tpl_edges(vertices, faces):
+ """Get topology edges from mesh"""
+ edge_index = []
+ for v in range(len(vertices)):
+ face_ids = np.argwhere(faces == v)[:, 0]
+ neighbor_ids = []
+ for face_id in face_ids:
+ for v_id in range(3):
+ if faces[face_id, v_id] != v:
+ neighbor_ids.append(faces[face_id, v_id])
+ neighbor_ids = list(set(neighbor_ids))
+ neighbor_ids = [np.array([v, n])[np.newaxis, :] for n in neighbor_ids]
+ if len(neighbor_ids) == 0:
+ continue
+ neighbor_ids = np.concatenate(neighbor_ids, axis=0)
+ edge_index.append(neighbor_ids)
+
+ if edge_index:
+ edge_index = np.concatenate(edge_index, axis=0)
+ else:
+ edge_index = np.array([]).reshape(0, 2)
+ return edge_index
+
+def save_args(args, output_dir, filename="config.json"):
+ args_dict = vars(args)
+ os.makedirs(output_dir, exist_ok=True)
+ config_path = os.path.join(output_dir, filename)
+ with open(config_path, 'w') as f:
+ json.dump(args_dict, f, indent=4)
+
+def save_skin_weights_to_rig(rig_path, skin_weights, output_path):
+ """
+ save skinning weights to rig file, keeping the original joints, root and hier information unchanged.
+
+ parameters:
+ rig_path: original rig path
+ skin_weights: predicted skinning weights
+ output_path: output rig path
+ """
+
+ original_rig = Info(rig_path)
+
+ joints_name = list(original_rig.joint_pos.keys())
+
+ skin_lines = []
+ for v in range(len(skin_weights)):
+ vi_skin = [str(v)]
+ skw = skin_weights[v]
+ skw = skw / (np.sum(skw))
+
+ for i in range(len(skw)):
+ if i == len(joints_name):
+ break
+ if skw[i] > 1e-5:
+ bind_joint_name = joints_name[i]
+ bind_weight = skw[i]
+ vi_skin.append(bind_joint_name)
+ vi_skin.append(str(bind_weight))
+ skin_lines.append(vi_skin)
+
+ with open(rig_path, 'r') as f_in:
+ original_lines = f_in.readlines()
+
+ preserved_lines = []
+ for line in original_lines:
+ word = line.split()
+ if word[0] in ['joints', 'root', 'hier']:
+ preserved_lines.append(line)
+
+ with open(output_path, 'w') as f_out:
+ for line in preserved_lines:
+ f_out.write(line)
+
+ for skw in skin_lines:
+ cur_line = 'skin {0} '.format(skw[0])
+ for cur_j in range(1, len(skw), 2):
+ cur_line += '{0} {1:.6f} '.format(skw[cur_j], float(skw[cur_j+1]))
+ cur_line += '\n'
+ f_out.write(cur_line)
+
+def normalize_vertices(vertices, scale=0.9):
+ bbmin, bbmax = vertices.min(0), vertices.max(0)
+ center = (bbmin + bbmax) * 0.5
+ scale = 2.0 * scale / (bbmax - bbmin).max()
+ vertices = (vertices - center) * scale
+ return vertices, center, scale
+
+def export_to_watertight(normalized_mesh, octree_depth: int = 7):
+ """
+ Convert the non-watertight mesh to watertight.
+
+ Args:
+ input_path (str): normalized path
+ octree_depth (int):
+
+ Returns:
+ mesh(trimesh.Trimesh): watertight mesh
+
+ """
+ size = 2 ** octree_depth
+ level = 2 / size
+
+ scaled_vertices, to_orig_center, to_orig_scale = normalize_vertices(normalized_mesh.vertices)
+
+ sdf = mesh2sdf.core.compute(scaled_vertices, normalized_mesh.faces, size=size)
+
+ vertices, faces, normals, _ = skimage.measure.marching_cubes(np.abs(sdf), level)
+
+ # watertight mesh
+ vertices = vertices / size * 2 - 1 # -1 to 1
+ vertices = vertices / to_orig_scale + to_orig_center
+ mesh = trimesh.Trimesh(vertices, faces, normals=normals)
+
+ return mesh
+
+def process_mesh_to_pc(mesh, marching_cubes = True, sample_num = 4096):
+
+ if marching_cubes:
+ mesh = export_to_watertight(mesh)
+ print("MC over!")
+ return_mesh = mesh
+ points, face_idx = mesh.sample(sample_num, return_index=True)
+ points, _, _ = normalize_to_unit_cube(points, 0.9995)
+ normals = mesh.face_normals[face_idx]
+
+ pc_normal = np.concatenate([points, normals], axis=-1, dtype=np.float16)
+ return pc_normal, return_mesh
+
+def post_filter(skin_weights, topology_edge, num_ring=1):
+ """
+ Post-process skinning weights by averaging over multi-ring neighbors.
+
+ Parameters:
+ skin_weights: (num_vertices, num_joints) array of skinning weights
+ topology_edge: (num_edges, 2) array of edges defining the mesh topology
+ num_ring: number of rings for neighbor averaging
+
+ Returns:
+ skin_weights_new: (num_vertices, num_joints) array of post-processed skin
+ """
+ skin_weights_new = np.zeros_like(skin_weights)
+ num_vertices = skin_weights.shape[0]
+
+ adjacency_list = [[] for _ in range(num_vertices)]
+ for e in range(topology_edge.shape[0]):
+ v1, v2 = topology_edge[e, 0], topology_edge[e, 1]
+ adjacency_list[v1].append(v2)
+
+ for v in range(num_vertices):
+ adj_verts_multi_ring = set()
+ visited = {v}
+ current_ring = {v}
+
+ for r in range(num_ring):
+ next_ring = set()
+ for seed in current_ring:
+ for neighbor in adjacency_list[seed]:
+ if neighbor not in visited:
+ next_ring.add(neighbor)
+ visited.add(neighbor)
+
+ adj_verts_multi_ring.update(next_ring)
+ if not next_ring:
+ break
+
+ current_ring = next_ring
+
+ # calculate the average skinning weights
+ adj_verts_multi_ring.discard(v)
+ if adj_verts_multi_ring:
+ skin_weights_neighbor = skin_weights[list(adj_verts_multi_ring), :]
+ skin_weights_new[v, :] = np.mean(skin_weights_neighbor, axis=0)
+ else:
+ skin_weights_new[v, :] = skin_weights[v, :]
+
+ return skin_weights_new
\ No newline at end of file
diff --git a/third_party/Puppeteer/skinning/utils/visualize.py b/third_party/Puppeteer/skinning/utils/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c7547c6aef14ec7591527f88e3cdc98fda60474
--- /dev/null
+++ b/third_party/Puppeteer/skinning/utils/visualize.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2021 Peizhuo Li
+# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates.
+# SPDX-License-Identifier: MIT
+#
+# This file has been modified by ByteDance Ltd. and/or its affiliates. on 2025.09.04
+#
+# Original file was released under MIT, with the full license text
+# available at https://github.com/PeizhuoLi/neural-blend-shapes/blob/main/LICENSE.
+#
+# This modified file is released under the same license.
+
+import bpy
+import numpy as np
+import random
+import os
+import matplotlib.pyplot as plt
+
+def generate_color_table(num_colors, base_color_path):
+ base_colors = np.load(base_color_path)
+ idx = list(range(base_colors.shape[0]))
+ random.seed(5)
+ random.shuffle(idx)
+ pt = 0
+ res = []
+ for i in range(num_colors):
+ res.append(base_colors[idx[pt]])
+ pt += 1
+ pt %= base_colors.shape[0]
+ return np.array(res)
+
+def weight2color(weight, colors):
+ res = np.matmul(weight, colors)
+ res = np.clip(res, 0, 1)
+ return res
+
+def export_obj_with_vertex_colors(obj_object, vertex_colors, filepath):
+ mesh = obj_object.data
+
+ with open(filepath, 'w') as f:
+ f.write("# Exported OBJ with vertex colors\n")
+ for v, color in zip(mesh.vertices, vertex_colors):
+ f.write("v {} {} {} {} {} {}\n".format(
+ v.co.x, v.co.y, v.co.z,
+ color[0], color[1], color[2]
+ ))
+
+ f.write("g {}\n".format(obj_object.name))
+ for poly in mesh.polygons:
+ vertices = [v + 1 for v in poly.vertices]
+ face = "f " + " ".join([str(v) for v in vertices]) + "\n"
+ f.write(face)
+
+def compute_error_map(weight_method, weight_gt):
+ error = np.abs(weight_method - weight_gt) # Shape: (n_vertex, n_color)
+ error_map = np.max(error, axis=1) # Taking the maximum error across all colors for each vertex
+ # error_map = np.mean(error, axis=1)
+ # Normalize error_map to [0,1]
+ error_map = np.clip(error_map, 0, 1)
+ return error_map
+
+def error_map_to_color(error_map, colormap='Reds'):
+ cmap = plt.get_cmap(colormap)
+ colors = cmap(error_map)[:, :3] # Discard alpha channel
+ return colors
+
+def generate_color_bar(filepath, colormap='Reds'):
+ fig, ax = plt.subplots(figsize=(8, 1.5))
+ fig.subplots_adjust(bottom=0.5)
+
+ cmap = plt.get_cmap(colormap)
+ norm = plt.Normalize(vmin=0, vmax=1)
+ cb1 = plt.colorbar(plt.cm.ScalarMappable(norm=norm, cmap=cmap),
+ cax=ax, orientation='horizontal')
+ cb1.ax.tick_params(labelsize=26)
+ plt.savefig(filepath, bbox_inches='tight')
+ plt.close()
+
+def main():
+ base_color_path = "utils/tableau_color.npy"
+
+ ### xlv2_test
+ obj_path = "xlv2_test_mesh" # save from npz files, check skeleton/data_utils/convert_npz_to_mesh_rig.py
+ weight_path1 = "gt_onxlv2_npy"
+ weight_path2 = "ours_onxlv2_npy"
+ # weight_path3 = "rignet_onxlv2_npy"
+ # weight_path4 = "gvb_onxlv2_npy"
+ save_path = "obj_w_skincolor_errormap_onxlv2_test"
+
+ color_bar_path = "error_color_bar.png" # Path to save the color bar image
+
+ if not os.path.exists(save_path):
+ os.makedirs(save_path)
+
+ generate_color_bar(color_bar_path, colormap='Reds')
+
+ bpy.ops.object.select_all(action='SELECT')
+ bpy.ops.object.delete(use_global=False)
+
+ for obj_file in os.listdir(obj_path):
+ obj_file_path = os.path.join(obj_path, obj_file)
+ wt_gt_file = os.path.join(weight_path1, obj_file.replace(".obj", "_skin.npy"))
+ wt_ours_file = os.path.join(weight_path2, obj_file.replace(".obj", "_skin.npy"))
+ # wt_rignet_file = os.path.join(weight_path3, obj_file.replace(".obj", "_skin.npy"))
+ # wt_gvb_file = os.path.join(weight_path4, obj_file.replace(".obj", "_skin.npy"))
+ if not os.path.exists(wt_gt_file) or not os.path.exists(wt_ours_file): # or not os.path.exists(wt_rignet_file) or not os.path.exists(wt_gvb_file):
+ continue
+
+ # 导入OBJ文件
+ bpy.ops.wm.obj_import(filepath=obj_file_path)
+ imported_objects = bpy.context.selected_objects
+ if not imported_objects:
+ print("Failed to import OBJ file:", obj_file_path)
+ continue
+ obj = imported_objects[0]
+
+ weight_gt = np.load(wt_gt_file)
+ weight_ours = np.load(wt_ours_file)
+ # weight_rignet = np.load(wt_rignet_file)
+ # weight_gvb = np.load(wt_gvb_file)
+ if weight_gt.shape != weight_ours.shape: # or weight_gt.shape != weight_rignet.shape or weight_gt.shape != weight_gvb.shape:
+ print("Weight shape mismatch among files")
+ bpy.ops.object.select_all(action='SELECT')
+ bpy.ops.object.delete(use_global=False)
+ continue
+
+ if len(weight_gt.shape) != 2:
+ print("Weight file should be 2D array")
+ bpy.ops.object.select_all(action='SELECT')
+ bpy.ops.object.delete(use_global=False)
+ continue
+
+ n_vertices = len(obj.data.vertices)
+ if weight_gt.shape[0] != n_vertices:
+ print(f"Vertex count mismatch: OBJ has {n_vertices}, but weight file has {weight_gt.shape[0]}")
+ bpy.ops.object.select_all(action='SELECT')
+ bpy.ops.object.delete(use_global=False)
+ continue
+
+ # generate colors
+ n_color = weight_gt.shape[1]
+ colors = generate_color_table(n_color, base_color_path)
+ vertex_colors_gt = weight2color(weight_gt, colors)
+ vertex_colors_ours = weight2color(weight_ours, colors)
+ # vertex_colors_rignet = weight2color(weight_rignet, colors)
+ # vertex_colors_gvb = weight2color(weight_gvb, colors)
+
+ # Save obj with vertex colors
+ output_path_gt = os.path.join(save_path, obj_file.replace(".obj", "_gt.obj"))
+ output_path_ours = os.path.join(save_path, obj_file.replace(".obj", "_ours.obj"))
+ # output_path_rignet = os.path.join(save_path, obj_file.replace(".obj", "_rignet.obj"))
+ # output_path_gvb = os.path.join(save_path, obj_file.replace(".obj", "_gvb.obj"))
+ export_obj_with_vertex_colors(obj, vertex_colors_gt, output_path_gt)
+ export_obj_with_vertex_colors(obj, vertex_colors_ours, output_path_ours)
+ # export_obj_with_vertex_colors(obj, vertex_colors_rignet, output_path_rignet)
+ # export_obj_with_vertex_colors(obj, vertex_colors_gvb, output_path_gvb)
+
+ # Save object with error color map
+ # 1. Ours vs GT
+ error_ours = compute_error_map(weight_ours, weight_gt)
+ colors_error_ours = error_map_to_color(error_ours, colormap='Reds')
+ output_path_error_ours = os.path.join(save_path, obj_file.replace(".obj", "_error_ours.obj"))
+ export_obj_with_vertex_colors(obj, colors_error_ours, output_path_error_ours)
+
+ # 2. Rignet vs GT
+ # error_rignet = compute_error_map(weight_rignet, weight_gt)
+ # colors_error_rignet = error_map_to_color(error_rignet, colormap='Reds')
+ # output_path_error_rignet = os.path.join(save_path, obj_file.replace(".obj", "_error_rignet.obj"))
+ # export_obj_with_vertex_colors(obj, colors_error_rignet, output_path_error_rignet)
+
+ # 3. GVB vs GT
+ # error_gvb = compute_error_map(weight_gvb, weight_gt)
+ # colors_error_gvb = error_map_to_color(error_gvb, colormap='Reds')
+ # output_path_error_gvb = os.path.join(save_path, obj_file.replace(".obj", "_error_gvb.obj"))
+ # export_obj_with_vertex_colors(obj, colors_error_gvb, output_path_error_gvb)
+
+ bpy.ops.object.select_all(action='SELECT')
+ bpy.ops.object.delete(use_global=False)
+
+ print("Done")
+
+if __name__ == "__main__":
+ main()