reachy_mini_remote_control

Running

File size: 62,365 Bytes

"""
Reachy Mini Controller
A centralized server that listens for Robot connections and hosts a Gradio control interface.
"""

import asyncio
import threading
import time
import queue
from dataclasses import dataclass
from typing import Optional, Tuple

import cv2
import gradio as gr
import numpy as np
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.responses import StreamingResponse
import uvicorn
from fastrtc import WebRTC, StreamHandler, get_cloudflare_turn_credentials
from huggingface_hub import get_token

from reachy_mini.utils import create_head_pose

token = get_token()

if not token:
    raise ValueError("No token found. Please set the HF_TOKEN environment variable or login to Hugging Face.")

# -------------------------------------------------------------------
# 1. Configuration
# -------------------------------------------------------------------

AUDIO_SAMPLE_RATE = 16000  # respeaker samplerate

# Audio queue configuration
MAX_AUDIO_QUEUE_SIZE = 2

# Movement step sizes
NUDGE_ANGLE = 5.0     # degrees for head roll / yaw
NUDGE_BODY = 0.3      # degrees for body_yaw
NUDGE_PITCH = 5.0     # degrees for pitch

# Video loop timing
FRAME_SLEEP_S = 0.04  # 25 fps

# TURN config
TURN_TTL_SERVER_MS = 360_000

USE_VIDEO_WEBRTC = False
USE_AUDIO_WEBRTC = False

turn_credentials = None
server_turn_credentials = None
while turn_credentials is None or server_turn_credentials is None:
    try:
        if turn_credentials is None:
            turn_credentials = get_cloudflare_turn_credentials(hf_token=token)
        if server_turn_credentials is None:
            server_turn_credentials = get_cloudflare_turn_credentials(ttl=TURN_TTL_SERVER_MS, hf_token=token)
    except Exception as e:
        print(f"[Video] Error getting turn credentials: {e!r}")
        time.sleep(1)


# -------------------------------------------------------------------
# 2. Data Models
# -------------------------------------------------------------------

@dataclass
class Movement:
    name: str
    x: float = 0
    y: float = 0
    z: float = 0
    roll: float = 0
    pitch: float = 0
    yaw: float = 0
    body_yaw: float = 0
    left_antenna: Optional[float] = None
    right_antenna: Optional[float] = None
    duration: float = 1.0


# -------------------------------------------------------------------
# 3. Global State
# -------------------------------------------------------------------

class GlobalState:
    """
    Singleton-style class to manage shared state between FastAPI (WebSockets)
    and Gradio (UI).
    """
    def __init__(self):
        # Connection handles
        self.robot_ws: Optional[WebSocket] = None
        self.robot_loop: Optional[asyncio.AbstractEventLoop] = None

        # Video Stream Data
        self.frame_lock = threading.Lock()
        self.black_frame = np.zeros((640, 640, 3), dtype=np.uint8)
        _, buffer = cv2.imencode(".jpg", self.black_frame)
        self.latest_frame_bytes = buffer.tobytes()
        self.latest_frame_ts = time.time()

        # Latency tracking for video
        self.video_latencies = []
        self.video_latency_window = 100  # Keep last 100 measurements

        # Audio from robot -> browser
        # Queue of (sample_rate: int, audio_bytes: bytes, timestamp: float)
        self.audio_queue: "queue.Queue[Tuple[int, bytes, float]]" = queue.Queue()

        # Audio from operator -> robot
        self.audio_to_robot_queue: "queue.Queue[bytes]" = queue.Queue()

        # Latency tracking for audio
        self.audio_latencies = []
        self.audio_latency_window = 100  # Keep last 100 measurements

        # Live pose state
        self.pose_lock = threading.Lock()
        self.current_pose = Movement(
            name="Current",
            x=0,
            y=0,
            z=0,
            roll=0,
            pitch=0,
            yaw=0,
            body_yaw=0,
            left_antenna=0,
            right_antenna=0,
            duration=0.2,
        )

    # --- Connection management ---

    def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop) -> None:
        self.robot_ws = ws
        self.robot_loop = loop

    def clear_robot_connection(self) -> None:
        self.robot_ws = None
        self.robot_loop = None

    # --- Video ---

    def update_frame(self, frame_bytes: bytes, robot_timestamp: Optional[float] = None) -> None:
        """
        Update the latest video frame.

        Args:
            frame_bytes: JPEG encoded frame data
            robot_timestamp: Optional timestamp from when the frame was captured on the robot
        """
        receive_time = time.time()
        with self.frame_lock:
            self.latest_frame_bytes = frame_bytes
            self.latest_frame_ts = receive_time

            # Calculate latency if timestamp provided
            if robot_timestamp is not None:
                latency_ms = (receive_time - robot_timestamp) * 1000
                self.video_latencies.append(latency_ms)

                # Keep only recent measurements
                if len(self.video_latencies) > self.video_latency_window:
                    self.video_latencies.pop(0)

    def get_video_latency_stats(self) -> dict:
        """Get video latency statistics in milliseconds."""
        if not self.video_latencies:
            return {"min": 0, "max": 0, "avg": 0, "latest": 0, "count": 0}

        return {
            "min": min(self.video_latencies),
            "max": max(self.video_latencies),
            "avg": sum(self.video_latencies) / len(self.video_latencies),
            "latest": self.video_latencies[-1],
            "count": len(self.video_latencies)
        }

    # --- Audio queues ---

    @staticmethod
    def _push_bounded(q: queue.Queue, item, max_size: int, description: str) -> None:
        while q.qsize() >= max_size:
            try:
                dropped = q.get_nowait()
                del dropped
            except queue.Empty:
                break
        q.put(item)

    def push_audio_from_robot(self, audio_bytes: bytes, robot_timestamp: Optional[float] = None) -> None:
        """
        Push audio data from robot to the queue for browser playback.

        Args:
            audio_bytes: Audio data bytes
            robot_timestamp: Optional timestamp from when audio was captured on robot
        """
        self._push_bounded(
            self.audio_queue,
            (AUDIO_SAMPLE_RATE, audio_bytes, robot_timestamp if robot_timestamp is not None else time.time()),
            MAX_AUDIO_QUEUE_SIZE,
            "FROM robot",
        )

    def push_audio_to_robot(self, audio_bytes: bytes) -> None:
        self._push_bounded(
            self.audio_to_robot_queue,
            audio_bytes,
            MAX_AUDIO_QUEUE_SIZE,
            "TO robot",
        )

    def get_audio_to_robot_blocking(self) -> bytes:
        try:
            return self.audio_to_robot_queue.get(timeout=0.2)
        except queue.Empty:
            return None

    def track_audio_latency(self, robot_timestamp: float) -> None:
        """Track audio latency when audio is about to be played."""
        playback_time = time.time()
        latency_ms = (playback_time - robot_timestamp) * 1000
        self.audio_latencies.append(latency_ms)

        # Keep only recent measurements
        if len(self.audio_latencies) > self.audio_latency_window:
            self.audio_latencies.pop(0)

    def get_audio_latency_stats(self) -> dict:
        """Get audio latency statistics in milliseconds."""
        if not self.audio_latencies:
            return {"min": 0, "max": 0, "avg": 0, "latest": 0, "count": 0}

        return {
            "min": min(self.audio_latencies),
            "max": max(self.audio_latencies),
            "avg": sum(self.audio_latencies) / len(self.audio_latencies),
            "latest": self.audio_latencies[-1],
            "count": len(self.audio_latencies)
        }

    # --- Status ---

    def get_connection_status(self) -> str:
        return "✅ Robot Connected" if self.robot_ws else "🔴 Waiting for Robot..."

    def get_latency_display(self) -> str:
        """Get formatted latency statistics for display."""
        video_stats = self.get_video_latency_stats()
        audio_stats = self.get_audio_latency_stats()

        lines = []

        if video_stats["count"] > 0:
            lines.append(
                f"📹 Video: {video_stats['latest']:.0f}ms "
                f"(avg: {video_stats['avg']:.0f}ms, max: {video_stats['max']:.0f}ms)"
            )

        if audio_stats["count"] > 0:
            lines.append(
                f"🎵 Audio: {audio_stats['latest']:.0f}ms "
                f"(avg: {audio_stats['avg']:.0f}ms, max: {audio_stats['max']:.0f}ms)"
            )

        if not lines:
            return "⏱️ Latency: Waiting for data..."

        return "\n".join(lines)

    # --- Pose management ---

    def update_pose(
        self,
        dx: float = 0,
        dy: float = 0,
        dz: float = 0,
        droll: float = 0,
        dpitch: float = 0,
        dyaw: float = 0,
        dbody_yaw: float = 0,
    ) -> Movement:
        with self.pose_lock:
            p = self.current_pose

            new = Movement(
                name="Current",
                x=p.x + dx,
                y=p.y + dy,
                z=p.z + dz,
                roll=p.roll + droll,
                pitch=p.pitch + dpitch,
                yaw=p.yaw + dyaw,
                body_yaw=p.body_yaw + dbody_yaw,
                left_antenna=p.left_antenna,
                right_antenna=p.right_antenna,
                duration=0.1,
            )

            # Clamp posed values
            new.pitch = float(np.clip(new.pitch, -30, 30))
            new.yaw = float(np.clip(new.yaw, -180, 180))
            new.roll = float(np.clip(new.roll, -40, 40))
            new.body_yaw = float(np.clip(new.body_yaw, -3, 3))
            new.z = float(np.clip(new.z, -20, 50))
            new.x = float(np.clip(new.x, -50, 50))
            new.y = float(np.clip(new.y, -50, 50))

            self.current_pose = new
            return new

    def reset_pose(self) -> Movement:
        with self.pose_lock:
            self.current_pose = Movement(
                name="Current",
                x=0,
                y=0,
                z=0,
                roll=0,
                pitch=0,
                yaw=0,
                body_yaw=0,
                left_antenna=0,
                right_antenna=0,
                duration=0.3,
            )
            return self.current_pose

    def get_pose_text(self) -> str:
        with self.pose_lock:
            p = self.current_pose
            return (
                "Head position:\n"
                f"  x={p.x:.1f}, y={p.y:.1f}, z={p.z:.1f}\n"
                f"  roll={p.roll:.1f}, pitch={p.pitch:.1f}, yaw={p.yaw:.1f}\n"
                "Body:\n"
                f"  body_yaw={p.body_yaw:.1f}"
            )


state = GlobalState()


# -------------------------------------------------------------------
# 4. Robot commands
# -------------------------------------------------------------------

def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
    if not (state.robot_ws and state.robot_loop):
        return state.get_pose_text(), "⚠️ Robot not connected"

    pose = create_head_pose(
        x=mov.x,
        y=mov.y,
        z=mov.z,
        roll=mov.roll,
        pitch=mov.pitch,
        yaw=mov.yaw,
        degrees=True,
        mm=True,
    )

    payload = {
        "type": "movement",
        "movement": {
            "head": pose.tolist(),
            "body_yaw": mov.body_yaw,
            "duration": mov.duration,
        },
    }

    if mov.left_antenna is not None and mov.right_antenna is not None:
        payload["movement"]["antennas"] = [
            np.deg2rad(mov.right_antenna),
            np.deg2rad(mov.left_antenna),
        ]

    asyncio.run_coroutine_threadsafe(
        state.robot_ws.send_json(payload),
        state.robot_loop,
    )

    return state.get_pose_text(), f"✅ {msg}"


# -------------------------------------------------------------------
# 5. Video streaming helpers
# -------------------------------------------------------------------

def generate_mjpeg_stream():
    last_timestamp = 0.0
    frame_count = 0
    start_time = time.time()
    while True:
        with state.frame_lock:
            current_bytes = state.latest_frame_bytes
            current_timestamp = state.latest_frame_ts

        if current_timestamp > last_timestamp and current_bytes is not None:
            last_timestamp = current_timestamp
            frame_count += 1
            elapsed = time.time() - start_time
            if elapsed > 1.0:
                fps = frame_count / elapsed
                print(f"[generate_mjpeg_stream] Current FPS: {fps:.2f}")
                frame_count = 0
                start_time = time.time()
            yield (
                b"--frame\r\n"
                b"Content-Type: image/jpeg\r\n\r\n" + current_bytes + b"\r\n"
            )
        else:
            time.sleep(FRAME_SLEEP_S)
            continue

        time.sleep(FRAME_SLEEP_S)


def webrtc_video_generator():
    """
    Generator for FastRTC WebRTC (mode='receive', modality='video').
    """
    last_ts = 0.0
    frame = state.black_frame.copy()
    frame_count = 0
    start_time = time.time()

    while True:
        with state.frame_lock:
            ts = state.latest_frame_ts
            frame_bytes = state.latest_frame_bytes

        if ts > last_ts and frame_bytes:
            last_ts = ts
            np_bytes = np.frombuffer(frame_bytes, dtype=np.uint8)
            decoded = cv2.imdecode(np_bytes, cv2.IMREAD_COLOR)
            if decoded is not None:
                frame = decoded
            else:
                frame = state.black_frame.copy()
            frame_count += 1
            elapsed = time.time() - start_time
            if elapsed > 1.0:
                fps = frame_count / elapsed
                print(f"[webrtc_video_generator] Current FPS: {fps:.2f}")
                frame_count = 0
                start_time = time.time()

        yield frame
        time.sleep(FRAME_SLEEP_S)


# -------------------------------------------------------------------
# 6. FastAPI endpoints
# -------------------------------------------------------------------

app = FastAPI()


@app.websocket("/robot")
async def robot_endpoint(ws: WebSocket):
    """Endpoint for the Robot to connect to (control channel)."""
    await ws.accept()
    state.set_robot_connection(ws, asyncio.get_running_loop())
    print("[System] Robot Connected")

    try:
        while True:
            msg = await ws.receive()
            if msg.get("type") == "websocket.disconnect":
                break
    except (WebSocketDisconnect, Exception):
        print("[System] Robot Disconnected")
    finally:
        state.clear_robot_connection()


@app.get("/video_feed")
def video_feed():
    return StreamingResponse(
        generate_mjpeg_stream(),
        media_type="multipart/x-mixed-replace; boundary=frame",
    )


def generate_audio_stream():
    """
    Generator for Raw PCM audio (No WAV header).
    Sends raw Int16 bytes directly to the browser.
    """
    # Clear old data to start fresh
    with state.audio_queue.mutex:
        state.audio_queue.queue.clear()

    # OPTIMIZATION: Lower latency target (e.g., 512 samples = ~32ms)
    TARGET_SAMPLES = 512 
    byte_buffer = bytearray()

    while True:
        try:
            # Wait for data
            sample_rate, chunk_bytes, robot_timestamp = state.audio_queue.get(timeout=1.0)

            # Track latency
            if robot_timestamp is not None:
                state.track_audio_latency(robot_timestamp)

            if chunk_bytes:
                byte_buffer.extend(chunk_bytes)
        except queue.Empty:
            # Send a tiny silence frame to keep the connection alive if needed
            # or just continue. 
            continue

        # Yield chunks suitable for the network
        # 1024 bytes (512 samples) is a good balance for TCP packets
        chunk_size = TARGET_SAMPLES * 2
        while len(byte_buffer) >= chunk_size:
            out_bytes = byte_buffer[:chunk_size]
            byte_buffer = byte_buffer[chunk_size:]
            yield bytes(out_bytes)

@app.get("/audio_feed")
def audio_feed():
    """Endpoint for streaming Raw PCM."""
    return StreamingResponse(
        generate_audio_stream(),
        media_type="application/octet-stream", # Changed from audio/wav
        headers={
            "Cache-Control": "no-cache",
            "X-Content-Type-Options": "nosniff",
        }
    )

@app.websocket("/video_stream")
async def stream_endpoint(ws: WebSocket):
    """
    Endpoint for Robot/Sim to send video frames.

    Expected message formats:
    1. Binary only (legacy): Just the JPEG frame bytes
    2. JSON with timestamp: {"timestamp": <float>, "frame": <base64 encoded frame>}
    """
    await ws.accept()
    frame_count = 0
    start_time = time.time()
    latency_report_interval = 5.0  # Report latency stats every 5 seconds
    last_latency_report = time.time()

    try:
        while True:
            msg = await ws.receive()

            # Handle binary-only messages (legacy mode, no timestamp)
            data = msg.get("bytes")
            if data:
                state.update_frame(data, robot_timestamp=None)
                frame_count += 1

            # Handle text/JSON messages with timestamp
            text_data = msg.get("text")
            if text_data:
                import base64
                import json
                try:
                    json_data = json.loads(text_data)
                    timestamp = json_data.get("timestamp")
                    frame_b64 = json_data.get("frame")
                    if frame_b64:
                        frame_bytes = base64.b64decode(frame_b64)
                        state.update_frame(frame_bytes, robot_timestamp=timestamp)
                        frame_count += 1
                except (json.JSONDecodeError, KeyError) as e:
                    print(f"[Video] Error parsing JSON: {e}")

            # FPS reporting
            elapsed = time.time() - start_time
            if elapsed > 1.0:
                fps = frame_count / elapsed
                print(f"[Video] Receiving FPS: {fps:.2f}")
                frame_count = 0
                start_time = time.time()

            # Latency reporting
            if time.time() - last_latency_report > latency_report_interval:
                stats = state.get_video_latency_stats()
                if stats["count"] > 0:
                    print(
                        f"[Video Latency] "
                        f"Latest: {stats['latest']:.1f}ms, "
                        f"Avg: {stats['avg']:.1f}ms, "
                        f"Min: {stats['min']:.1f}ms, "
                        f"Max: {stats['max']:.1f}ms "
                        f"(over {stats['count']} frames)"
                    )
                last_latency_report = time.time()

    except WebSocketDisconnect as e:
        print(f"[Video] WebSocketDisconnect: code={e.code}, reason={e.reason}")
    except asyncio.CancelledError:
        print("[Video] stream_endpoint cancelled")
    except Exception as e:
        print(f"[Video] stream_endpoint closed with error: {e!r}")
    finally:
        print("[Video] stream_endpoint closed (finally)")

@app.websocket("/audio_stream")
async def audio_endpoint(ws: WebSocket):
    """
    Full duplex audio channel between Robot/Sim and server.

    Expected message formats from robot:
    1. Binary only (legacy): Just the audio bytes
    2. JSON with timestamp: {"timestamp": <float>, "audio": <base64 encoded audio>}
    """
    await ws.accept()
    print("[Audio] Stream Connected")

    latency_report_interval = 5.0  # Report latency stats every 5 seconds
    last_latency_report = time.time()

    async def robot_to_server():
        nonlocal last_latency_report
        try:
            while True:
                data = await ws.receive()
                t = data.get("type")
                if t == "websocket.disconnect":
                    print("[Audio] Disconnected (recv)")
                    break

                if t == "websocket.receive":
                    # Handle binary-only messages (legacy mode, no timestamp)
                    if data.get("bytes"):
                        state.push_audio_from_robot(data["bytes"], robot_timestamp=None)

                    # Handle JSON messages with timestamp
                    elif data.get("text"):
                        text_data = data.get("text")
                        if text_data == "ping":
                            print("[Audio] Received ping")
                        else:
                            import json
                            import base64
                            try:
                                json_data = json.loads(text_data)
                                timestamp = json_data.get("timestamp")
                                audio_b64 = json_data.get("audio")
                                if audio_b64:
                                    audio_bytes = base64.b64decode(audio_b64)
                                    state.push_audio_from_robot(audio_bytes, robot_timestamp=timestamp)
                            except (json.JSONDecodeError, KeyError):
                                pass

                # Latency reporting
                if time.time() - last_latency_report > latency_report_interval:
                    stats = state.get_audio_latency_stats()
                    if stats["count"] > 0:
                        print(
                            f"[Audio Latency] "
                            f"Latest: {stats['latest']:.1f}ms, "
                            f"Avg: {stats['avg']:.1f}ms, "
                            f"Min: {stats['min']:.1f}ms, "
                            f"Max: {stats['max']:.1f}ms "
                            f"(over {stats['count']} chunks)"
                        )
                    last_latency_report = time.time()

        except asyncio.CancelledError:
            print("[Audio] robot_to_server cancelled")
        except Exception as e:
            print(f"[Audio] robot_to_server error: {e}")

    async def server_to_robot():
        loop = asyncio.get_running_loop()
        try:
            while True:
                chunk: bytes = await loop.run_in_executor(
                    None, state.get_audio_to_robot_blocking
                )
                if chunk is not None:
                    await ws.send_bytes(chunk)
        except asyncio.CancelledError:
            print("[Audio] server_to_robot cancelled")
        except Exception as e:
            print(f"[Audio] server_to_robot error: {e}")

    try:
        await asyncio.gather(robot_to_server(), server_to_robot())
    except asyncio.CancelledError:
        print("[Audio] audio_endpoint cancelled")
    finally:
        print("[Audio] Stream Closed")


@app.websocket("/browser_stream")
async def browser_stream_endpoint(ws: WebSocket):
    """
    Bi-directional connection for the Browser.
    - Sends Microphone data (Browser -> Robot)
    - Receives Speaker data (Robot -> Browser)
    """
    await ws.accept()
    print("[Browser] WebSocket Connected")
    
    # Task: Send Audio FROM Robot TO Browser
    async def send_to_browser():
        while True:
            # Get audio from the robot queue (non-blocking check)
            if not state.audio_queue.empty():
                _, chunk_bytes, robot_timestamp = state.audio_queue.get(timeout=0.5)

                # Track latency if we have a timestamp
                if robot_timestamp is not None:
                    state.track_audio_latency(robot_timestamp)
                try:
                    # Send as binary message
                    await ws.send_bytes(chunk_bytes)
                except Exception:
                    break
            else:
                await asyncio.sleep(0.005) # Tiny sleep to prevent CPU burn

    # Task: Receive Audio FROM Browser TO Robot
    async def receive_from_browser():
        try:
            while True:
                data = await ws.receive_bytes()
                # Push directly to the robot's input queue
                state.push_audio_to_robot(data)
        except Exception as e:
            print(f"[Browser] Input stream ended: {e}")

    try:
        # Run both tasks concurrently
        await asyncio.gather(send_to_browser(), receive_from_browser())
    except Exception as e:
        print(f"[Browser] WebSocket Closed: {e}")
    finally:
        print("[Browser] Disconnected")

# -------------------------------------------------------------------
# 7. FastRTC audio handler
# -------------------------------------------------------------------

class RobotAudioHandler(StreamHandler):
    """
    FastRTC handler that connects browser WebRTC audio to the robot.

    - receive(): browser mic -> state.audio_to_robot_queue -> /audio_stream -> robot
    - emit(): state.audio_queue (robot) -> browser playback
    """

    def __init__(self) -> None:
        super().__init__(
            input_sample_rate=AUDIO_SAMPLE_RATE,
            output_sample_rate=AUDIO_SAMPLE_RATE,
        )

    def receive(self, frame: Tuple[int, np.ndarray]) -> None:
        if frame is None:
            return

        sample_rate, array = frame
        if array is None:
            return

        arr = np.asarray(array)

        # Ensure mono
        if arr.ndim > 1:
            arr = arr[0]

        if arr.dtype != np.int16:
            if np.issubdtype(arr.dtype, np.floating):
                arr = np.clip(arr, -1.0, 1.0)
                arr = (arr * 32767.0).astype(np.int16)
            else:
                arr = arr.astype(np.int16)

        state.push_audio_to_robot(arr.tobytes())

    def emit(self):
        try:
            sample_rate, frame_bytes, robot_timestamp = state.audio_queue.get(timeout=0.5)

            # Track latency if we have a timestamp
            if robot_timestamp is not None:
                state.track_audio_latency(robot_timestamp)

            audio = np.frombuffer(frame_bytes, dtype=np.int16).reshape(1, -1)
            return sample_rate, audio
        except queue.Empty:
            return None

    def copy(self) -> "RobotAudioHandler":
        return RobotAudioHandler()

    def shutdown(self) -> None:
        pass

    def start_up(self) -> None:
        pass


# -------------------------------------------------------------------
# 8. Movement UI helpers
# -------------------------------------------------------------------

def get_pose_string():
    """Returns pose in format JS can parse: pitch:X,yaw:Y,roll:Z,body:B"""
    with state.pose_lock:
        p = state.current_pose
        return f"pitch:{p.pitch:.1f},yaw:{p.yaw:.1f},roll:{p.roll:.1f},body:{p.body_yaw:.1f}"


def nudge_pose(dpitch=0, dyaw=0, droll=0, dbody_yaw=0, label="Move"):
    """Modified to return pose string instead of tuple."""
    mov = state.update_pose(
        dpitch=dpitch,
        dyaw=dyaw,
        droll=droll,
        dbody_yaw=dbody_yaw,
    )
    send_pose_to_robot(mov, label)
    return get_pose_string()


def center_pose():
    """Modified to return pose string."""
    mov = state.reset_pose()
    send_pose_to_robot(mov, "Reset pose")
    return get_pose_string()


# -------------------------------------------------------------------
# 9. Gradio UI
# -------------------------------------------------------------------

CUSTOM_CSS = """
/* Dark theme overrides */
.gradio-container {
    background: linear-gradient(135deg, #0a0a0f 0%, #121218 100%) !important;
    min-height: 100vh;
}
.dark {
    --background-fill-primary: #12121a !important;
    --background-fill-secondary: #1a1a24 !important;
    --border-color-primary: #2a2a3a !important;
    --text-color-subdued: #888 !important;
}

/* Header styling */
#header-row {
    background: transparent !important;
    border: none !important;
    margin-bottom: 1rem;
    display: flex !important;
    justify-content: space-between !important;
    align-items: flex-start !important;
}
#app-title {
    font-size: 1.5rem !important;
    font-weight: 600 !important;
    background: linear-gradient(90deg, #fff, #888) !important;
    -webkit-background-clip: text !important;
    -webkit-text-fill-color: transparent !important;
    border: none !important;
    padding: 0 !important;
    margin: 0 !important;
}

/* Status badge */
#status-box {
    flex-shrink: 0 !important;
    width: auto !important;
    max-width: 200px !important;
    min-width: 160px !important;
    background: rgba(16, 185, 129, 0.15) !important;
    border: 1px solid rgba(16, 185, 129, 0.4) !important;
    border-radius: 9999px !important;
    padding: 0.4rem 1rem !important;
    font-size: 0.875rem !important;
}
#status-box textarea {
    background: transparent !important;
    border: none !important;
    color: #10b981 !important;
    text-align: center !important;
    font-weight: 500 !important;
    padding: 0 !important;
    min-height: unset !important;
    height: auto !important;
    line-height: 1.4 !important;
}

/* Latency display */
#latency-box {
    flex-shrink: 0 !important;
    width: 100% !important;
    max-width: 100% !important;
    min-width: 300px !important;
    background: rgba(139, 92, 246, 0.15) !important;
    border: 1px solid rgba(139, 92, 246, 0.4) !important;
    border-radius: 0.75rem !important;
    padding: 0.5rem 1rem !important;
    font-size: 0.85rem !important;
    margin-top: 0.5rem !important;
}
#latency-box textarea {
    background: transparent !important;
    border: none !important;
    color: #a78bfa !important;
    text-align: left !important;
    font-weight: 500 !important;
    font-family: monospace !important;
    padding: 0 !important;
    min-height: 2.5rem !important;
    height: auto !important;
    max-height: 4rem !important;
    line-height: 1.4 !important;
    white-space: pre-wrap !important;
    overflow-y: auto !important;
    resize: none !important;
}
#latency-box textarea::-webkit-scrollbar {
    display: none !important;
}
#latency-box .scroll-hide {
    scrollbar-width: none !important;
}
#latency-box::-webkit-scrollbar {
    display: none !important;
}

/* Video panel */
#video-column {
    background: #0f0f14 !important;
    border-radius: 1rem !important;
    border: 1px solid #2a2a3a !important;
    overflow: hidden !important;
    min-height: 500px !important;
}
#robot-video {
    border-radius: 0.75rem !important;
    overflow: hidden !important;
}

/* Control panel cards */
.control-card {
    background: rgba(26, 26, 36, 0.8) !important;
    border: 1px solid #2a2a3a !important;
    border-radius: 0.75rem !important;
    padding: 1rem !important;
}

/* Audio section */
#audio-section {
    background: rgba(26, 26, 36, 0.8) !important;
    border: 1px solid #2a2a3a !important;
    border-radius: 0.75rem !important;
}
#listen-btn {
    background: rgba(139, 92, 246, 0.2) !important;
    border: 1px solid rgba(139, 92, 246, 0.3) !important;
    color: #a78bfa !important;
    border-radius: 0.5rem !important;
    transition: all 0.2s !important;
}
#listen-btn:hover {
    background: rgba(139, 92, 246, 0.3) !important;
}

/* Hide the default keyboard buttons */
#keyboard-buttons {
    display: none !important;
}

/* Quick action buttons */
.quick-btn {
    background: #1f1f2e !important;
    border: 1px solid #2a2a3a !important;
    border-radius: 0.5rem !important;
    padding: 0.5rem !important;
    font-size: 0.75rem !important;
    transition: all 0.2s !important;
}
.quick-btn:hover {
    background: #2a2a3a !important;
}

/* Keyboard visualization container */
#keyboard-viz {
    position: fixed;
    bottom: 3.5rem;
    right: 2rem;
    z-index: 1000;
    pointer-events: none;
}

/* Gauges container */
#gauges-viz {
    position: fixed;
    bottom: 3.5rem;
    left: 2rem;
    z-index: 1000;
    pointer-events: none;
}

/* Hide Gradio footer or make room for it */
footer {
    opacity: 0.5;
}

/* Hidden pose state (keep in DOM for JS) */
#pose-state {
    position: absolute !important;
    opacity: 0 !important;
    pointer-events: none !important;
    height: 0 !important;
    overflow: hidden !important;
}
"""

KEYBOARD_VIZ_HTML = """
<div id="keyboard-viz">
  <div style="
    background: rgba(0,0,0,0.75);
    backdrop-filter: blur(12px);
    border: 1px solid rgba(255,255,255,0.15);
    border-radius: 16px;
    padding: 16px;
    display: flex;
    flex-direction: column;
    gap: 6px;
    align-items: center;
  ">
    <div style="display: flex; gap: 6px;">
      <div class="key" data-key="q">Q</div>
      <div class="key" data-key="w">W</div>
      <div class="key" data-key="e">E</div>
    </div>
    <div style="display: flex; gap: 6px;">
      <div class="key" data-key="a">A</div>
      <div class="key" data-key="s">S</div>
      <div class="key" data-key="d">D</div>
    </div>
    <div style="display: flex; gap: 6px; margin-top: 6px;">
      <div class="key" data-key="j">J</div>
      <div class="key" data-key="h">H</div>
      <div class="key" data-key="l">L</div>
    </div>
  </div>
</div>

<style>
  .key {
    width: 48px;
    height: 48px;
    background: linear-gradient(180deg, #3a3a4a 0%, #2a2a3a 100%);
    border: 1px solid #4a4a5a;
    border-radius: 8px;
    display: flex;
    align-items: center;
    justify-content: center;
    font-size: 14px;
    font-weight: 600;
    color: #ccc;
    font-family: system-ui, sans-serif;
    transition: all 0.1s;
  }
  .key.active {
    background: linear-gradient(180deg, #8b5cf6 0%, #7c3aed 100%);
    border-color: #a78bfa;
    color: white;
    box-shadow: 0 0 16px rgba(139, 92, 246, 0.6);
    transform: scale(0.95);
  }
</style>
"""

GAUGES_HTML = """
<div id="gauges-viz">
  <div style="
    background: rgba(0,0,0,0.75);
    backdrop-filter: blur(12px);
    border: 1px solid rgba(255,255,255,0.15);
    border-radius: 16px;
    padding: 16px 20px;
    display: flex;
    gap: 24px;
  ">
    <div class="gauge-container">
      <svg width="72" height="48" viewBox="0 0 72 48">
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#8b5cf6" stroke-width="5" stroke-linecap="round" 
              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="pitch"/>
        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
              class="gauge-needle" data-gauge="pitch" transform="rotate(0, 36, 42)"/>
        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
      </svg>
      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Pitch</div>
      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="pitch">0.0°</div>
    </div>
    <div class="gauge-container">
      <svg width="72" height="48" viewBox="0 0 72 48">
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#06b6d4" stroke-width="5" stroke-linecap="round"
              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="yaw"/>
        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
              class="gauge-needle" data-gauge="yaw" transform="rotate(0, 36, 42)"/>
        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
      </svg>
      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Yaw</div>
      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="yaw">0.0°</div>
    </div>
    <div class="gauge-container">
      <svg width="72" height="48" viewBox="0 0 72 48">
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#f59e0b" stroke-width="5" stroke-linecap="round"
              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="roll"/>
        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
              class="gauge-needle" data-gauge="roll" transform="rotate(0, 36, 42)"/>
        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
      </svg>
      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Roll</div>
      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="roll">0.0°</div>
    </div>
    <div style="width: 1px; background: #3a3a4a; margin: 0 4px;"></div>
    <div class="gauge-container">
      <svg width="72" height="48" viewBox="0 0 72 48">
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#ec4899" stroke-width="5" stroke-linecap="round"
              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="body"/>
        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
              class="gauge-needle" data-gauge="body" transform="rotate(0, 36, 42)"/>
        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
      </svg>
      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Body</div>
      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="body">0.0°</div>
    </div>
  </div>
</div>
"""

KEYBOARD_JS = """
() => {
    const keyMap = {
        'w': 'w', 's': 's', 'a': 'a', 'd': 'd',
        'q': 'q', 'e': 'e', 'h': 'h', 'j': 'j', 'l': 'l',
    };
    const btnMap = {
        'w': 'btn-forward', 's': 'btn-back', 'a': 'btn-left', 'd': 'btn-right',
        'q': 'btn-tilt-up', 'e': 'btn-tilt-down', 'h': 'btn-center',
        'j': 'btn-body-left', 'l': 'btn-body-right',
    };
    
    let lastPressed = {};
    const REPEAT_MS = 120;

    document.addEventListener('keydown', (ev) => {
        const key = ev.key.toLowerCase();
        if (!keyMap[key]) return;
        
        // Visual feedback
        const keyEl = document.querySelector(`.key[data-key="${key}"]`);
        if (keyEl) keyEl.classList.add('active');
        
        // Rate limit and trigger button
        const now = Date.now();
        if (lastPressed[key] && now - lastPressed[key] < REPEAT_MS) return;
        lastPressed[key] = now;
        ev.preventDefault();
        
        const btn = document.getElementById(btnMap[key]);
        if (btn) btn.click();
    });

    document.addEventListener('keyup', (ev) => {
        const key = ev.key.toLowerCase();
        const keyEl = document.querySelector(`.key[data-key="${key}"]`);
        if (keyEl) keyEl.classList.remove('active');
    });

    // Watch pose-state textbox for changes and update gauges
    const updateGaugesFromState = () => {
        const poseEl = document.querySelector('#pose-state textarea');
        if (!poseEl) return;
        
        const text = poseEl.value;
        // Parse: "pitch:0.0,yaw:0.0,roll:0.0,body:0.0"
        const match = text.match(/pitch:([\\d.-]+),yaw:([\\d.-]+),roll:([\\d.-]+),body:([\\d.-]+)/);
        if (!match) return;
        
        const pitch = parseFloat(match[1]);
        const yaw = parseFloat(match[2]);
        const roll = parseFloat(match[3]);
        const body = parseFloat(match[4]);
        
        const gauges = { pitch: [-30, 30], yaw: [-180, 180], roll: [-40, 40], body: [-3, 3] };
        const values = { pitch, yaw, roll, body };
        
        Object.entries(gauges).forEach(([name, [min, max]]) => {
            const value = values[name];
            const normalized = (value - min) / (max - min);
            const angle = (normalized - 0.5) * 180;
            
            const needle = document.querySelector(`.gauge-needle[data-gauge="${name}"]`);
            if (needle) needle.setAttribute('transform', `rotate(${angle}, 36, 42)`);
            
            const display = document.querySelector(`.gauge-value[data-gauge="${name}"]`);
            if (display) display.textContent = value.toFixed(1) + '°';
        });
    };
    
    // Poll for pose updates every 100ms
    setInterval(updateGaugesFromState, 100);
    
    // Update status box styling based on connection state
    const updateStatusStyle = () => {
        const statusBox = document.querySelector('#status-box');
        if (!statusBox) return;
        const textarea = statusBox.querySelector('textarea');
        if (!textarea) return;
        
        const isConnected = textarea.value.includes('Connected');
        if (isConnected) {
            statusBox.style.background = 'rgba(16, 185, 129, 0.15)';
            statusBox.style.borderColor = 'rgba(16, 185, 129, 0.4)';
            textarea.style.color = '#10b981';
        } else {
            statusBox.style.background = 'rgba(239, 68, 68, 0.15)';
            statusBox.style.borderColor = 'rgba(239, 68, 68, 0.4)';
            textarea.style.color = '#ef4444';
        }
    };
    setInterval(updateStatusStyle, 500);

    console.log('🎮 Keyboard controls ready');
}
"""

APP_JS = """
() => {
    // ==========================================
    // 1. BI-DIRECTIONAL AUDIO WITH MIC SELECTION
    // ==========================================
    
    // Global handles to manage hot-swapping
    window.currentStream = null;
    window.wsHandle = null;

    // --- Helper: Populate Mic List ---
    window.refreshMicList = async function() {
        const select = document.getElementById('mic-select');
        try {
            const devices = await navigator.mediaDevices.enumerateDevices();
            const audioInputs = devices.filter(device => device.kind === 'audioinput');
            
            const currentVal = select.value;
            select.innerHTML = ''; // Clear existing
            
            // Add Default option
            const defaultOpt = document.createElement('option');
            defaultOpt.value = "";
            defaultOpt.text = "Default Microphone";
            select.appendChild(defaultOpt);

            audioInputs.forEach(device => {
                const option = document.createElement('option');
                option.value = device.deviceId;
                // If label is empty, permission isn't granted yet
                option.text = device.label || `Microphone ${device.deviceId.slice(0,5)}...`; 
                select.appendChild(option);
            });
            
            // Restore selection if it still exists
            if (currentVal) select.value = currentVal;
            
        } catch (e) {
            console.error("Error listing devices", e);
        }
    };

    window.startAudioPlayer = async function() {
        const btn = document.getElementById('start-stream-btn');
        const status = document.getElementById('audio-status');
        const micSelect = document.getElementById('mic-select');
        
        console.log("[Audio] Starting Bi-Directional Stream...");
        
        try {
            // --- A. Setup Audio Context ---
            const AudioContext = window.AudioContext || window.webkitAudioContext;
            if (!window.audioCtx) {
                window.audioCtx = new AudioContext({ sampleRate: 16000 });
            }
            const ctx = window.audioCtx;
            if (ctx.state === 'suspended') await ctx.resume();

            status.innerText = "Status: Requesting Mic...";
            btn.disabled = true;

            // --- B. Get Microphone (Input) ---
            // Check dropdown for specific device ID
            const selectedMicId = micSelect.value;
            const constraints = { 
                audio: { 
                    deviceId: selectedMicId ? { exact: selectedMicId } : undefined,
                    channelCount: 1, 
                    sampleRate: 16000,
                    echoCancellation: true,
                    noiseSuppression: true,
                    autoGainControl: true
                } 
            };

            const stream = await navigator.mediaDevices.getUserMedia(constraints);
            window.currentStream = stream; // Save global ref

            // **Refresh list now that we have permission (to show labels)**
            await window.refreshMicList();
            if (selectedMicId) micSelect.value = selectedMicId;

            status.innerText = "Status: Connecting WS...";
            
            // --- C. Setup WebSocket ---
            // If we are restarting, reuse WS if open, or create new
            let ws = window.wsHandle;
            if (!ws || ws.readyState !== WebSocket.OPEN) {
                const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
                const wsUrl = `${protocol}//${window.location.host}/browser_stream`;
                ws = new WebSocket(wsUrl);
                ws.binaryType = 'arraybuffer';
                window.wsHandle = ws;
            }

            // --- D. Setup Input Processor (Mic -> WS) ---
            const source = ctx.createMediaStreamSource(stream);
            // BufferSize 1024 provides a good balance of latency vs stability
            const processor = ctx.createScriptProcessor(1024, 1, 1);

            processor.onaudioprocess = (e) => {
                if (ws.readyState !== WebSocket.OPEN) return;

                const inputData = e.inputBuffer.getChannelData(0);
                
                // Convert Float32 (-1.0 to 1.0) -> Int16 (-32768 to 32767)
                const int16Buffer = new Int16Array(inputData.length);
                for (let i = 0; i < inputData.length; i++) {
                    let s = Math.max(-1, Math.min(1, inputData[i]));
                    int16Buffer[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
                }
                
                ws.send(int16Buffer.buffer);
            };

            source.connect(processor);
            processor.connect(ctx.destination);

            // --- E. Setup Output (WS -> Speaker) ---
            // Only attach listener if it's a new WS connection
            if (!ws.onmessage) {
                let nextTime = 0;
                
                ws.onopen = () => {
                    console.log("[Audio] WebSocket Open");
                    status.innerText = "Status: 🟢 Connected";
                    btn.innerText = "Microphone Active";
                };

                ws.onmessage = (event) => {
                    const int16Data = new Int16Array(event.data);
                    const floatBuffer = ctx.createBuffer(1, int16Data.length, 16000);
                    const channelData = floatBuffer.getChannelData(0);
                    
                    for (let i = 0; i < int16Data.length; i++) {
                        channelData[i] = (int16Data[i] / 32768.0); 
                    }

                    const src = ctx.createBufferSource();
                    src.buffer = floatBuffer;
                    src.connect(ctx.destination);
                    
                    const now = ctx.currentTime;
                    if (nextTime < now) nextTime = now;

                    if (nextTime > now + 0.08) {
                        console.log("Catching up audio latency...");
                        nextTime = now;
                    }                    
                    src.start(nextTime);
                    nextTime += floatBuffer.duration;
                };

                ws.onerror = (e) => {
                    console.error("WS Error", e);
                    status.innerText = "Status: WebSocket Error";
                    btn.disabled = false;
                };
                
                ws.onclose = () => {
                    status.innerText = "Status: Disconnected";
                    btn.disabled = false;
                    btn.innerText = "▶️ Reconnect";
                    if (window.currentStream) {
                        window.currentStream.getTracks().forEach(track => track.stop());
                    }
                    processor.disconnect();
                    source.disconnect();
                };
            } else {
                // If WS was already open, just update UI
                status.innerText = "Status: 🟢 Connected (Mic Switched)";
                btn.innerText = "Microphone Active";
            }

            // Handle Mic Switching
            micSelect.onchange = async () => {
                console.log("Switching microphone...");
                status.innerText = "Status: Switching Mic...";
                
                // Stop current mic tracks
                if (window.currentStream) {
                    window.currentStream.getTracks().forEach(t => t.stop());
                }
                processor.disconnect();
                source.disconnect();
                
                // Restart player (will pick up new value from dropdown)
                await window.startAudioPlayer();
            };

        } catch (err) {
            console.error("[Audio] Setup Error:", err);
            status.innerText = "Error: " + err.message;
            btn.disabled = false;
        }
    };
    
    // Attempt to list mics on load (will likely have empty labels until permission)
    setTimeout(window.refreshMicList, 1000);


    // ==========================================
    // 2. KEYBOARD & GAUGE LOGIC (Unchanged)
    // ==========================================
    const keyMap = {'w':'w','s':'s','a':'a','d':'d','q':'q','e':'e','h':'h','j':'j','l':'l'};
    const btnMap = {'w':'btn-forward','s':'btn-back','a':'btn-left','d':'btn-right','q':'btn-tilt-up','e':'btn-tilt-down','h':'btn-center','j':'btn-body-left','l':'btn-body-right'};
    let lastPressed = {};
    const REPEAT_MS = 120;
    document.addEventListener('keydown', (ev) => {
        const key = ev.key.toLowerCase();
        if (!keyMap[key]) return;
        const keyEl = document.querySelector(`.key[data-key="${key}"]`);
        if (keyEl) keyEl.classList.add('active');
        const now = Date.now();
        if (lastPressed[key] && now - lastPressed[key] < REPEAT_MS) return;
        lastPressed[key] = now;
        ev.preventDefault();
        const btn = document.getElementById(btnMap[key]);
        if (btn) btn.click();
    });
    document.addEventListener('keyup', (ev) => {
        const key = ev.key.toLowerCase();
        const keyEl = document.querySelector(`.key[data-key="${key}"]`);
        if (keyEl) keyEl.classList.remove('active');
    });
    const updateGaugesFromState = () => {
        const poseEl = document.querySelector('#pose-state textarea');
        if (!poseEl) return;
        const text = poseEl.value;
        const match = text.match(/pitch:([\\d.-]+),yaw:([\\d.-]+),roll:([\\d.-]+),body:([\\d.-]+)/);
        if (!match) return;
        const values = { pitch: parseFloat(match[1]), yaw: parseFloat(match[2]), roll: parseFloat(match[3]), body: parseFloat(match[4]) };
        const gauges = { pitch: [-30, 30], yaw: [-180, 180], roll: [-40, 40], body: [-3, 3] };
        Object.entries(gauges).forEach(([name, [min, max]]) => {
            const normalized = (values[name] - min) / (max - min);
            const angle = (normalized - 0.5) * 180;
            const needle = document.querySelector(`.gauge-needle[data-gauge="${name}"]`);
            if (needle) needle.setAttribute('transform', `rotate(${angle}, 36, 42)`);
            const display = document.querySelector(`.gauge-value[data-gauge="${name}"]`);
            if (display) display.textContent = values[name].toFixed(1) + '°';
        });
    };
    setInterval(updateGaugesFromState, 100);
    const updateStatusStyle = () => {
        const statusBox = document.querySelector('#status-box');
        if (!statusBox || !statusBox.querySelector('textarea')) return;
        const textarea = statusBox.querySelector('textarea');
        const isConnected = textarea.value.includes('Connected');
        statusBox.style.background = isConnected ? 'rgba(16, 185, 129, 0.15)' : 'rgba(239, 68, 68, 0.15)';
        statusBox.style.borderColor = isConnected ? 'rgba(16, 185, 129, 0.4)' : 'rgba(239, 68, 68, 0.4)';
        textarea.style.color = isConnected ? '#10b981' : '#ef4444';
    };
    setInterval(updateStatusStyle, 500);
    console.log('🎮 Controls & Mic Select Ready');
}
"""

# -------------------------------------------------------------------
# Gradio UI with new styling
# -------------------------------------------------------------------

with gr.Blocks(
    title="Reachy Controller",
    theme=gr.themes.Base(
        primary_hue="violet",
        neutral_hue="slate",
    ),
    css=CUSTOM_CSS,
) as demo:
    
    # Header
    with gr.Row(elem_id="header-row"):
        gr.Markdown("## 🤖 Reachy Mini", elem_id="app-title")
        with gr.Column(scale=1):
            status_box = gr.Textbox(
                value=state.get_connection_status,
                every=2,
                show_label=False,
                container=False,
                elem_id="status-box",
            )
            latency_box = gr.Textbox(
                value=state.get_latency_display,
                every=1,
                show_label=False,
                container=False,
                elem_id="latency-box",
            )

    with gr.Row():
        # Left column - Controls
        with gr.Column(scale=1):
            # Hidden pose state textbox - polls pose for JS gauges
            pose_state = gr.Textbox(
                value=get_pose_string,
                every=0.2,
                show_label=False,
                container=False,
                elem_id="pose-state",
            )
            
            # Audio section
            with gr.Group(elem_id="audio-section"):
                gr.Markdown("### 🎧 Audio")
                if USE_AUDIO_WEBRTC:
                    listen_btn = gr.Button("🎤 Start Listening", elem_id="listen-btn")
                    robot_audio = WebRTC(
                        label="",
                        modality="audio",
                        mode="send-receive",
                        rtc_configuration=turn_credentials,
                        server_rtc_configuration=server_turn_credentials,
                        full_screen=False,
                    )
                    robot_audio.stream(
                        fn=RobotAudioHandler(),
                        inputs=[robot_audio],
                        outputs=[robot_audio],
                        trigger=listen_btn.click,
                    )
                else:
                    # HTML with Microphone Select Dropdown
                    audio_player_html = """
                    <div style="padding: 10px; background: rgba(0,0,0,0.2); border-radius: 8px;">
                        
                        <div style="margin-bottom: 8px; display: flex; gap: 8px;">
                            <select id="mic-select" style="
                                background: #1f1f2e; 
                                color: #ccc; 
                                border: 1px solid #3a3a4a; 
                                padding: 6px; 
                                border-radius: 4px; 
                                flex-grow: 1;
                                outline: none;">
                                <option value="">Default Microphone</option>
                            </select>
                            
                            <button onclick="window.refreshMicList()" style="
                                background: #2a2a3a; 
                                border: 1px solid #3a3a4a; 
                                color: #aaa; 
                                border-radius: 4px; 
                                cursor: pointer; 
                                padding: 0 8px;" title="Refresh Device List">
                                🔄
                            </button>
                        </div>

                        <button id="start-stream-btn" onclick="window.startAudioPlayer()" 
                            style="background: #7c3aed; color: white; border: none; padding: 8px 16px; border-radius: 4px; cursor: pointer; width: 100%;">
                            ▶️ Click to Start Audio Stream
                        </button>
                        
                        <div id="audio-status" style="margin-top: 8px; font-size: 12px; color: #aaa;">Status: Stopped</div>
                    </div>
                    """
                    gr.HTML(value=audio_player_html, label="🎵 Robot Audio")

            # Quick actions
            with gr.Group(elem_classes="control-card"):
                gr.Markdown("### ⚡ Quick Actions")
                with gr.Row():
                    btn_center_quick = gr.Button("🏠 Center", elem_classes="quick-btn")
                    btn_look_up = gr.Button("👀 Look Up", elem_classes="quick-btn")
                with gr.Row():
                    btn_curious = gr.Button("🎭 Curious", elem_classes="quick-btn")
                    btn_excited = gr.Button("🎉 Excited", elem_classes="quick-btn")

            # Hidden keyboard buttons (still needed for JS clicks)
            with gr.Group(elem_id="keyboard-buttons"):
                btn_forward = gr.Button("W", elem_id="btn-forward")
                btn_back = gr.Button("S", elem_id="btn-back")
                btn_left = gr.Button("A", elem_id="btn-left")
                btn_right = gr.Button("D", elem_id="btn-right")
                btn_tilt_up = gr.Button("Q", elem_id="btn-tilt-up")
                btn_tilt_down = gr.Button("E", elem_id="btn-tilt-down")
                btn_body_left = gr.Button("J", elem_id="btn-body-left")
                btn_body_right = gr.Button("L", elem_id="btn-body-right")
                btn_center = gr.Button("H", elem_id="btn-center")

            # Wire up hidden buttons - outputs required for Gradio to execute!
            btn_forward.click(
                lambda: nudge_pose(dpitch=-NUDGE_PITCH, label="W"),
                outputs=[pose_state],
            )
            btn_back.click(
                lambda: nudge_pose(dpitch=NUDGE_PITCH, label="S"),
                outputs=[pose_state],
            )
            btn_left.click(
                lambda: nudge_pose(dyaw=NUDGE_ANGLE * 2, label="A"),
                outputs=[pose_state],
            )
            btn_right.click(
                lambda: nudge_pose(dyaw=-NUDGE_ANGLE * 2, label="D"),
                outputs=[pose_state],
            )
            btn_tilt_up.click(
                lambda: nudge_pose(droll=-NUDGE_ANGLE, label="Q"),
                outputs=[pose_state],
            )
            btn_tilt_down.click(
                lambda: nudge_pose(droll=NUDGE_ANGLE, label="E"),
                outputs=[pose_state],
            )
            btn_body_left.click(
                lambda: nudge_pose(dbody_yaw=NUDGE_BODY, label="J"),
                outputs=[pose_state],
            )
            btn_body_right.click(
                lambda: nudge_pose(dbody_yaw=-NUDGE_BODY, label="L"),
                outputs=[pose_state],
            )
            btn_center.click(center_pose, outputs=[pose_state])
            
            # Wire up quick action buttons
            btn_center_quick.click(center_pose, outputs=[pose_state])
            btn_look_up.click(
                lambda: nudge_pose(dpitch=-15, label="Look Up"),
                outputs=[pose_state],
            )
            btn_curious.click(
                lambda: nudge_pose(dpitch=-10, droll=15, label="Curious"),
                outputs=[pose_state],
            )
            btn_excited.click(
                lambda: nudge_pose(dpitch=-5, droll=-10, label="Excited"),
                outputs=[pose_state],
            )

        # Right column - Video
        with gr.Column(scale=2, elem_id="video-column"):
            if USE_VIDEO_WEBRTC:
                robot_video = WebRTC(
                    label="",
                    modality="video",
                    mode="receive",
                    rtc_configuration=turn_credentials,
                    server_rtc_configuration=server_turn_credentials,
                    elem_id="robot-video",
                )
                robot_video.stream(
                    fn=webrtc_video_generator,
                    inputs=[],
                    outputs=[robot_video],
                    trigger=listen_btn.click,
                )
            else:
                html_code = """
                    <html>
                    <body>
                        <img src="/video_feed" style="width: 100%; max-width: 1080px; border-radius: 8px;">
                    </body>
                    </html>
                    """
                gr.HTML(value=html_code, label="🎬 Robot Video")

    # Floating keyboard visualization
    gr.HTML(KEYBOARD_VIZ_HTML)
    gr.HTML(GAUGES_HTML)

    # Load keyboard handler
    demo.load(None, None, None, js=KEYBOARD_JS if USE_AUDIO_WEBRTC else APP_JS)

# -------------------------------------------------------------------
# 10. Mount & run
# -------------------------------------------------------------------

app = gr.mount_gradio_app(app, demo, path="/")

if __name__ == "__main__":
    print("🚀 Server starting on http://0.0.0.0:7860")
    print("ℹ️ Point your Robot/Sim to: ws://<YOUR_PC_IP>:7860/robot")
    uvicorn.run(app, host="0.0.0.0", port=7860, proxy_headers=True, forwarded_allow_ips="*", log_level="warning")