reachy_mini_remote_control

Running

App Files Files Community

andito HF Staff commited on 17 days ago

Commit

113c326

verified ·

1 Parent(s): 156b337

Update app.py

Browse files

Files changed (1) hide show

app.py +703 -575

app.py CHANGED Viewed

@@ -1,16 +1,14 @@
 """
-Reachy Mini Controller - Cleaned Version
 A centralized server that listens for Robot connections and hosts a Gradio control interface.
 """
 import asyncio
-import io
-import json
 import threading
 import time
 import queue
 from dataclasses import dataclass
-from typing import List, Optional
 import cv2
 import gradio as gr
@@ -18,24 +16,40 @@ import numpy as np
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from fastapi.responses import StreamingResponse
 import uvicorn
-from fastrtc import WebRTC, StreamHandler, get_cloudflare_turn_credentials_async, get_cloudflare_turn_credentials
-# Try to import the utility, handle error if running in standalone test without library
-try:
-    from reachy_mini.utils import create_head_pose
-except ImportError:
-    print("⚠️ Warning: reachy_mini module not found. Mocking create_head_pose for testing.")
-    def create_head_pose(**kwargs): return np.array([0,0,0])
 AUDIO_SAMPLE_RATE = 16000  # respeaker samplerate
-import os
-async def get_credentials():
-    # Will use HF_TOKEN env var inside the Space. There is a limit of 10GB per month: https://fastrtc.org/deployment/
-    return await get_cloudflare_turn_credentials_async(hf_token=os.getenv("HF_TOKEN"))
-# --- 1. Data Models & Presets ---
 @dataclass
 class Movement:
     name: str
@@ -50,110 +64,109 @@ class Movement:
     right_antenna: Optional[float] = None
     duration: float = 1.0
-PRESETS = {
-    "Home": Movement("Home", 0, 0, 0, 0, 0, 0, 0, 0, 0),
-    "Look Left": Movement("Look Left", 0, 0, 0, 0, 0, 30, 1, 0, 0),
-    "Look Right": Movement("Look Right", 0, 0, 0, 0, 0, -30, -1, 0, 0),
-    "Look Up": Movement("Look Up", 0, 0, 0, 0, -20, 0, 0, 0, 0),
-    "Look Down": Movement("Look Down", 0, 0, 0, 0, 15, 0, 0, 0, 0),
-    "Curious": Movement("Curious", 10, 0, 10, 15, -10, -15, 0,  45, -45),
-    "Excited": Movement("Excited", 0, 0, 20, 0, -15, 0, 0, 90, 90),
-    "Shy": Movement("Shy", -10, 0, -10, 10, 10, 20, 0, -30, 30),
-}
-SEQUENCES = {
-    "Wave": ["Home", "Look Left", "Look Right", "Look Left", "Look Right", "Home"],
-    "Nod": ["Home", "Look Down", "Look Up", "Look Down", "Home"],
-    "Excited Dance": ["Home", "Excited", "Look Left", "Look Right", "Home"],
-}
-# --- 2. Global State Management ---
 class GlobalState:
     """
     Singleton-style class to manage shared state between FastAPI (WebSockets)
-    and Gradio (UI Thread).
     """
     def __init__(self):
         # Connection handles
         self.robot_ws: Optional[WebSocket] = None
         self.robot_loop: Optional[asyncio.AbstractEventLoop] = None
         # Video Stream Data
         self.frame_lock = threading.Lock()
         self.black_frame = np.zeros((640, 640, 3), dtype=np.uint8)
-        _, buffer = cv2.imencode('.jpg', self.black_frame)
         self.latest_frame_bytes = buffer.tobytes()
         self.latest_frame_ts = time.time()
         # Audio from robot -> browser
-        # Queue of (sample_rate: int, audio: np.ndarray[int16, shape=(1, N)])
-        self.audio_queue = queue.Queue()
-        # Audio from operator/server -> robot
-        self.audio_to_robot_queue = queue.Queue()
-        # --- Live pose state (for WASDQE control) ---
         self.pose_lock = threading.Lock()
         self.current_pose = Movement(
             name="Current",
-            x=0, y=0, z=0,
-            roll=0, pitch=0, yaw=0,
             body_yaw=0,
             left_antenna=0,
             right_antenna=0,
             duration=0.2,
         )
-    def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop):
         self.robot_ws = ws
         self.robot_loop = loop
-    def update_frame(self, frame_bytes: bytes):
         with self.frame_lock:
             self.latest_frame_bytes = frame_bytes
             self.latest_frame_ts = time.time()
-    def push_audio(self, audio_bytes: bytes):
-        """
-        Pushes raw audio bytes.
-        If the queue is full (meaning we are lagging), throw away the OLDEST audio.
-        """
-        MAX_QUEUE_SIZE = 2  # keep latency low
-        while self.audio_queue.qsize() >= MAX_QUEUE_SIZE:
-            try:
-                print("Dropping oldest audio FROM robot, queue size is", self.audio_queue.qsize())
-                self.audio_queue.get_nowait()
-            except queue.Empty:
-                break
-        self.audio_queue.put((AUDIO_SAMPLE_RATE, audio_bytes))
-    def push_audio_to_robot(self, audio_bytes: bytes):
-        """
-        Audio coming FROM the operator/server, going TO the robot.
-        """
-        MAX_QUEUE_SIZE = 2
-        while self.audio_to_robot_queue.qsize() >= MAX_QUEUE_SIZE:
             try:
-                print("Dropping oldest audio TO robot, queue size is", self.audio_to_robot_queue.qsize())
-                self.audio_to_robot_queue.get_nowait()
             except queue.Empty:
                 break
-        self.audio_to_robot_queue.put(audio_bytes)
     def get_audio_to_robot_blocking(self) -> bytes:
-        """
-        Blocking get for the sender task in /audio_stream.
-        """
-        return self.audio_to_robot_queue.get()
     def get_connection_status(self) -> str:
-        if self.robot_ws:
-            return "✅ Robot Connected"
-        return "🔴 Waiting for Robot..."
     def update_pose(
         self,
@@ -165,10 +178,6 @@ class GlobalState:
         dyaw: float = 0,
         dbody_yaw: float = 0,
     ) -> Movement:
-        """
-        Apply a small delta to the current pose and return a new Movement.
-        This is what WASDQE will use.
-        """
         with self.pose_lock:
             p = self.current_pose
@@ -186,7 +195,7 @@ class GlobalState:
                 duration=0.4,
             )
-            # Optional clamping (adjust ranges as you like)
             new.pitch = float(np.clip(new.pitch, -30, 30))
             new.yaw = float(np.clip(new.yaw, -180, 180))
             new.roll = float(np.clip(new.roll, -40, 40))
@@ -199,12 +208,15 @@ class GlobalState:
             return new
     def reset_pose(self) -> Movement:
-        """Back to neutral / home pose."""
         with self.pose_lock:
             self.current_pose = Movement(
                 name="Current",
-                x=0, y=0, z=0,
-                roll=0, pitch=0, yaw=0,
                 body_yaw=0,
                 left_antenna=0,
                 right_antenna=0,
@@ -213,31 +225,37 @@ class GlobalState:
             return self.current_pose
     def get_pose_text(self) -> str:
-        """Human-readable pose info to show in the UI."""
         with self.pose_lock:
             p = self.current_pose
             return (
-                f"Head position:\n"
                 f"  x={p.x:.1f}, y={p.y:.1f}, z={p.z:.1f}\n"
                 f"  roll={p.roll:.1f}, pitch={p.pitch:.1f}, yaw={p.yaw:.1f}\n"
-                f"Body:\n"
                 f"  body_yaw={p.body_yaw:.1f}"
             )
 state = GlobalState()
 def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
-    """
-    Convert Movement -> head pose + body_yaw payload and fire it to the robot.
-    Used by the WASDQE controls.
-    """
     if not (state.robot_ws and state.robot_loop):
         return state.get_pose_text(), "⚠️ Robot not connected"
     pose = create_head_pose(
-        x=mov.x, y=mov.y, z=mov.z,
-        roll=mov.roll, pitch=mov.pitch, yaw=mov.yaw,
-        degrees=True, mm=True,
     )
     payload = {
@@ -262,182 +280,88 @@ def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
     return state.get_pose_text(), f"✅ {msg}"
-# --- 3. Controller Logic ---
-class MovementManager:
-    def __init__(self):
-        self.queue: List[Movement] = []
-        self.is_playing = False
-        self.auto_play = True
-        self.playback_speed = 1.0
-        self.play_thread: Optional[threading.Thread] = None
-    def add_movement(self, mov: Movement):
-        self.queue.append(mov)
-        if self.auto_play and not self.is_playing:
-            self.play_queue()
-        return self.get_queue_text(), f"✅ Added {mov.name}"
-    def add_preset(self, name: str):
-        if name in PRESETS:
-            return self.add_movement(PRESETS[name])
-        return self.get_queue_text(), "❌ Unknown Preset"
-    def add_sequence(self, name: str):
-        if name in SEQUENCES:
-            for move_name in SEQUENCES[name]:
-                self.queue.append(PRESETS[move_name])
-            if self.auto_play and not self.is_playing:
-                self.play_queue()
-            return self.get_queue_text(), f"✅ Added Sequence: {name}"
-        return self.get_queue_text(), "❌ Unknown Sequence"
-    def clear_queue(self):
-        self.queue.clear()
-        self.is_playing = False
-        return self.get_queue_text(), "🗑️ Queue Cleared"
-    def remove_last(self):
-        if self.queue:
-            self.queue.pop()
-        return self.get_queue_text(), "🗑️ Removed Last"
-    def get_queue_text(self):
-        if not self.queue:
-            return "📋 Queue Empty"
-        lines = ["📋 Current Queue:"]
-        for i, m in enumerate(self.queue, 1):
-            indicator = "▶️" if (i==1 and self.is_playing) else "  "
-            lines.append(f"{indicator} {i}. {m.name} ({m.duration}s)")
-        return "\n".join(lines)
-    def play_queue(self, speed=None):
-        if speed: self.playback_speed = speed
-        if self.is_playing: return self.get_queue_text(), "⚠️ Already Playing"
-        if not self.queue: return self.get_queue_text(), "⚠️ Queue Empty"
-        self.is_playing = True
-        self.play_thread = threading.Thread(target=self._worker, daemon=True)
-        self.play_thread.start()
-        return self.get_queue_text(), "▶️ Playing..."
-    def stop_playback(self):
-        self.is_playing = False
-        if self.play_thread:
-            self.play_thread.join(timeout=1.0)
-        return self.get_queue_text(), "⏹️ Stopped"
-    def _worker(self):
-        """Background thread that processes the queue."""
-        idx = 0
-        try:
-            while self.is_playing and idx < len(self.queue):
-                move = self.queue[idx]
-                # 1. Build Payload
-                pose = create_head_pose(
-                    x=move.x, y=move.y, z=move.z,
-                    roll=move.roll, pitch=move.pitch, yaw=move.yaw,
-                    degrees=True, mm=True
-                )
-                payload = {
-                    "type": "movement",
-                    "movement": {
-                        "head": pose.tolist(),
-                        "body_yaw": move.body_yaw,
-                        "duration": move.duration / self.playback_speed
-                    }
-                }
-                # Add antennas if specified
-                if move.left_antenna is not None and move.right_antenna is not None:
-                    payload["movement"]["antennas"] = [
-                        np.deg2rad(move.right_antenna),
-                        np.deg2rad(move.left_antenna)
-                    ]
-                # 2. Send to Robot (Async safe)
-                if state.robot_ws and state.robot_loop:
-                    asyncio.run_coroutine_threadsafe(
-                        state.robot_ws.send_json(payload),
-                        state.robot_loop
-                    )
-                else:
-                    print("⚠️ Robot not connected, skipping command.")
-                # 3. Wait for move to finish (blocking thread, not async loop)
-                time.sleep(move.duration / self.playback_speed)
-                idx += 1
-            # Loop finished
-            if not self.auto_play:
-                self.is_playing = False
-            else:
-                # In auto-play, we stay "playing" but wait for new items
-                self.is_playing = False
-        except Exception as e:
-            print(f"Playback Error: {e}")
-            self.is_playing = False
-    def generate_mjpeg_stream(self):
-        last_timestamp = 0.0
-        while True:
-            # 1. Check if frame has changed
-            with state.frame_lock:
-                current_bytes = state.latest_frame_bytes
-                current_timestamp = state.latest_frame_ts
-            # 2. Only yield if this is a new frame
-            if current_timestamp > last_timestamp:
-                last_timestamp = current_timestamp
-                if current_bytes is not None:
-                    yield (b'--frame\r\n'
-                        b'Content-Type: image/jpeg\r\n\r\n' + current_bytes + b'\r\n')
             else:
-                # If no new frame, sleep a bit longer to save CPU
-                time.sleep(0.02)
-                continue
-            # Cap FPS slightly to prevent saturation
-            time.sleep(0.02)
-manager = MovementManager()
-# --- 4. FastAPI & WebSocket Logic ---
 app = FastAPI()
 @app.websocket("/robot")
 async def robot_endpoint(ws: WebSocket):
-    """Endpoint for the Robot to connect to (Control Channel)."""
     await ws.accept()
     state.set_robot_connection(ws, asyncio.get_running_loop())
-    print("[System] Robot Connected!")
     try:
-        # Heartbeat loop
         while True:
-            # We wait for messages, but mostly we just hold the connection open
-            # and send commands via the state.robot_ws handle.
-            msg = await ws.receive()
-            if msg["type"] == "websocket.disconnect":
                 break
     except (WebSocketDisconnect, Exception):
         print("[System] Robot Disconnected")
     finally:
-        state.robot_ws = None
 @app.get("/video_feed")
 def video_feed():
     return StreamingResponse(
-        manager.generate_mjpeg_stream(),
-        media_type="multipart/x-mixed-replace; boundary=frame"
     )
 @app.websocket("/video_stream")
 async def stream_endpoint(ws: WebSocket):
     """Endpoint for Robot/Sim to send video frames."""
@@ -445,10 +369,15 @@ async def stream_endpoint(ws: WebSocket):
     try:
         while True:
             msg = await ws.receive()
-            if "bytes" in msg and msg["bytes"]:
-                state.update_frame(msg["bytes"])
     except Exception:
-        pass
 @app.websocket("/audio_stream")
 async def audio_endpoint(ws: WebSocket):
@@ -457,66 +386,69 @@ async def audio_endpoint(ws: WebSocket):
     print("[Audio] Stream Connected")
     async def robot_to_server():
-        """Robot mic -> server -> state.audio_queue (-> WebRTC -> browser)."""
         try:
             while True:
                 data = await ws.receive()
                 t = data.get("type")
                 if t == "websocket.disconnect":
                     print("[Audio] Disconnected (recv)")
                     break
                 if t == "websocket.receive":
                     if data.get("bytes"):
-                        # Audio FROM robot
-                        state.push_audio(data["bytes"])
                     elif data.get("text") == "ping":
                         print("[Audio] Received ping")
-                    else:
-                        print(f"[Audio] Received unknown message: {data}")
         except Exception as e:
             print(f"[Audio] robot_to_server error: {e}")
     async def server_to_robot():
-        """Server/operator audio -> robot speaker via WebSocket."""
         loop = asyncio.get_running_loop()
         try:
             while True:
                 chunk: bytes = await loop.run_in_executor(
                     None, state.get_audio_to_robot_blocking
                 )
-                if chunk is None:
-                    continue
-                await ws.send_bytes(chunk)
         except Exception as e:
             print(f"[Audio] server_to_robot error: {e}")
     try:
         await asyncio.gather(robot_to_server(), server_to_robot())
     finally:
         print("[Audio] Stream Closed")
-# --- 5. Gradio Interface ---
 class RobotAudioHandler(StreamHandler):
     """
     FastRTC handler that connects browser WebRTC audio to the robot.
-    - receive(): audio from browser mic -> state.audio_to_robot_queue (then /audio_stream sends it to robot)
-    - emit(): audio from state.audio_queue (filled by /audio_stream robot_to_server) -> browser playback
     """
     def __init__(self) -> None:
-        super().__init__(input_sample_rate=AUDIO_SAMPLE_RATE, output_sample_rate=AUDIO_SAMPLE_RATE)
-    def receive(self, frame: tuple[int, np.ndarray]) -> None:
-        """Called whenever the browser sends audio."""
         if frame is None:
             return
-        sr, array = frame
         if array is None:
             return
@@ -526,7 +458,6 @@ class RobotAudioHandler(StreamHandler):
         if arr.ndim > 1:
             arr = arr[0]
-        # Convert to int16 and then to bytes for the robot
         if arr.dtype != np.int16:
             if np.issubdtype(arr.dtype, np.floating):
                 arr = np.clip(arr, -1.0, 1.0)
@@ -537,382 +468,579 @@ class RobotAudioHandler(StreamHandler):
         state.push_audio_to_robot(arr.tobytes())
     def emit(self):
-        """
-        Called repeatedly by FastRTC to get audio to send to the browser.
-        Should return (sample_rate, np.ndarray[int16]) or None.
-        """
         try:
             sample_rate, frame_bytes = state.audio_queue.get(timeout=0.5)
             audio = np.frombuffer(frame_bytes, dtype=np.int16).reshape(1, -1)
             return sample_rate, audio
         except queue.Empty:
-            # No audio right now, tell FastRTC to skip sending
             return None
     def copy(self) -> "RobotAudioHandler":
-        """
-        FastRTC will call this when it needs a new handler for a new session.
-        The handler itself is stateless; it always looks at GlobalState.
-        """
         return RobotAudioHandler()
     def shutdown(self) -> None:
-        """Called on session shutdown. Nothing to clean up for now."""
         pass
     def start_up(self) -> None:
-        """Called on session startup. Nothing special to do."""
         pass
-def webrtc_audio_generator():
-    """
-    Generator for FastRTC.
-    """
-    # Clear old data to start fresh
-    with state.audio_queue.mutex:
-        state.audio_queue.queue.clear()
-    # OPTIMIZATION: Reduce target samples.
-    # 4096 samples @ 16kHz is 256ms of latency built-in!
-    # Try 1024 (64ms) or 512 (32ms) for lower latency.
-    TARGET_SAMPLES = 1024
-    byte_buffer = bytearray()
-    while True:
-        try:
-            # Wait up to 1 second for data. If no data, loop again.
-            # Do NOT use a short timeout combined with silence generation.
-            sample_rate, chunk_bytes = state.audio_queue.get(timeout=1.0)
-            if chunk_bytes:
-                byte_buffer.extend(chunk_bytes)
-        except queue.Empty:
-            # If we really have no data for a long time, just continue waiting.
-            # Do NOT yield silence here.
-            continue
-        # Only yield when we have enough data
-        while len(byte_buffer) >= TARGET_SAMPLES * 2:  # int16 = 2 bytes
-            read_size = TARGET_SAMPLES * 2
-            out_bytes = byte_buffer[:read_size]
-            byte_buffer = byte_buffer[read_size:]
-            audio_int16 = np.frombuffer(out_bytes, dtype=np.int16)
-            audio_int16 = audio_int16.reshape(1, -1)
-            yield (AUDIO_SAMPLE_RATE, audio_int16)
-def handle_operator_audio(sr: int, audio: np.ndarray):
-    """
-    Called continuously by FastRTC when the browser sends mic audio.
-    `audio` is expected to be shape (channels, samples) or (samples,)
-    with dtype int16 or float32, depending on FastRTC config.
-    """
-    if audio is None:
-        return
-    arr = np.asarray(audio)
-    # Ensure mono and int16
-    if arr.ndim > 1:
-        arr = arr[0]  # take first channel
-    if arr.dtype != np.int16:
-        # For float32 in [-1, 1]
-        if np.issubdtype(arr.dtype, np.floating):
-            arr = np.clip(arr, -1.0, 1.0)
-            arr = (arr * 32767.0).astype(np.int16)
-        else:
-            arr = arr.astype(np.int16)
-    state.push_audio_to_robot(arr.tobytes())
-    # No UI output
-    return
-def webrtc_video_generator():
-    """
-    Generator for FastRTC WebRTC (mode='receive', modality='video').
-    It reads JPEG bytes from state.latest_frame_bytes, decodes them with OpenCV,
-    and yields HxWx3 uint8 frames as expected by FastRTC.
-    """
-    last_ts = 0.0
-    frame = state.black_frame.copy()
-    while True:
-        with state.frame_lock:
-            ts = state.latest_frame_ts
-            frame_bytes = state.latest_frame_bytes
-        if ts > last_ts and frame_bytes:
-            last_ts = ts
-            np_bytes = np.frombuffer(frame_bytes, dtype=np.uint8)
-            frame = cv2.imdecode(np_bytes, cv2.IMREAD_COLOR)
-            if frame is None:
-                frame = state.black_frame.copy()
-        # Shape (H, W, 3), dtype uint8
-        yield frame
-NUDGE_POS = 5.0      # mm or arbitrary units
-NUDGE_HEIGHT = 5.0   # z
-NUDGE_ANGLE = 5.0    # degrees
-NUDGE_BODY = 0.3     # degrees for body_yaw
-def move_w():
-    """
-    W: Move "forward" (e.g. towards positive y or z depending on your convention).
-    Here: we'll go +z (raise head) as an example.
-    """
-    mov = state.update_pose(dpitch=-NUDGE_HEIGHT)
-    return send_pose_to_robot(mov, "W (forward/up)")
-def move_s():
-    mov = state.update_pose(dpitch=NUDGE_HEIGHT)
-    return send_pose_to_robot(mov, "S (back/down)")
-def move_a():
-    """
-    A: turn left -> head yaw left + body yaw left.
-    """
-    mov = state.update_pose(dyaw=NUDGE_ANGLE*2)
-    return send_pose_to_robot(mov, "A (turn left)")
-def move_d():
-    """
-    D: turn right -> head yaw right + body yaw right.
-    """
-    mov = state.update_pose(dyaw=-NUDGE_ANGLE*2)
-    return send_pose_to_robot(mov, "D (turn right)")
-def move_q():
-    """
-    Q: tilt head up (pitch negative if you follow your earlier convention).
-    """
-    mov = state.update_pose(droll=-NUDGE_ANGLE)
-    return send_pose_to_robot(mov, "Q (tilt up)")
-def move_e():
-    """
-    E: tilt head down (pitch positive).
-    """
-    mov = state.update_pose(droll=NUDGE_ANGLE)
-    return send_pose_to_robot(mov, "E (tilt down)")
-def move_body_left():
-    mov = state.update_pose(dbody_yaw=NUDGE_BODY)
-    return send_pose_to_robot(mov, "Body Left (<)")
-def move_body_right():
-    mov = state.update_pose(dbody_yaw=-NUDGE_BODY)
-    return send_pose_to_robot(mov, "Body Right (>)")
-def center_pose():
-    mov = state.reset_pose()
-    return send_pose_to_robot(mov, "Reset pose")
-with gr.Blocks(title="Reachy Controller", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🤖 Reachy Mini Controller")
     with gr.Row():
-        # --- LEFT COLUMN: Controls ---
         with gr.Column(scale=1):
-            status_box = gr.Textbox(label="System Status", value=state.get_connection_status, every=2)
-            pose_box = gr.Textbox(
-            label="Current Pose",
-            value=state.get_pose_text,
-            every=0.5,
-            lines=8,
             )
-            with gr.Group():
-                gr.Markdown("### ���� Audio Listen")
-                # Start button for the WebRTC stream
-                listen_btn = gr.Button("🔊 Start Listening", variant="secondary")
-                # FastRTC WebRTC component in receive mode, audio only
                 robot_audio = WebRTC(
-                    label="Robot Audio",
                     modality="audio",
                     mode="send-receive",
-                    rtc_configuration=get_cloudflare_turn_credentials(),
-                    server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000),
-                    full_screen=False
                 )
-                # Use the handler directly, like in the FastRTC docs
                 robot_audio.stream(
                     fn=RobotAudioHandler(),
                     inputs=[robot_audio],
                     outputs=[robot_audio],
-                    time_limit=60,
                 )
-            # with gr.Group():
-            #     gr.Markdown("### 🎮 Playback")
-            #     auto_play = gr.Checkbox(label="Auto-play", value=True)
-            #     speed = gr.Slider(0.5, 2.0, 1.0, label="Speed")
-            #     with gr.Row():
-            #         play_btn = gr.Button("▶️ Play", variant="primary")
-            #         stop_btn = gr.Button("⏹️ Stop")
-            #     with gr.Row():
-            #         clear_btn = gr.Button("🗑️ Clear")
-            #         undo_btn = gr.Button("↶ Undo")
-            # queue_display = gr.Textbox(label="Queue", value=manager.get_queue_text, lines=10)
-            # --- Live movement control ---
-            with gr.Group():
-                gr.Markdown("### 🕹️ Keyboard Control (WASD + QE)")
-                # These buttons will be triggered by keyboard events via JS
-                btn_forward = gr.Button("Look up (W)", elem_id="btn-forward")
-                btn_back = gr.Button("Look down (S)", elem_id="btn-back")
-                btn_left = gr.Button("Left (A)", elem_id="btn-left")
-                btn_right = gr.Button("Right (D)", elem_id="btn-right")
-                btn_tilt_up = gr.Button("Tilt left (Q)", elem_id="btn-tilt-up")
-                btn_tilt_down = gr.Button("Tilt right (E)", elem_id="btn-tilt-down")
-                btn_body_left = gr.Button("Body Left (J)", elem_id="btn-body-left")
-                btn_body_right = gr.Button("Body Right (L)", elem_id="btn-body-right")
-                btn_center = gr.Button("Center (H)", elem_id="btn-center")
-                # Each button updates the pose_box text
-                btn_forward.click(move_w, outputs=[pose_box])
-                btn_back.click(move_s, outputs=[pose_box])
-                btn_left.click(move_a, outputs=[pose_box])
-                btn_right.click(move_d, outputs=[pose_box])
-                btn_tilt_up.click(move_q, outputs=[pose_box])
-                btn_tilt_down.click(move_e, outputs=[pose_box])
-                btn_body_left.click(move_body_left, outputs=[pose_box])
-                btn_body_right.click(move_body_right, outputs=[pose_box])
-                btn_center.click(center_pose, outputs=[pose_box])
-        # --- RIGHT COLUMN: View ---
-        with gr.Column(scale=2):
             robot_video = WebRTC(
-                label="Robot Video",
                 modality="video",
                 mode="receive",
-                rtc_configuration=get_cloudflare_turn_credentials(),
-                server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000)
             )
             robot_video.stream(
-                fn=lambda: webrtc_video_generator(),
                 inputs=[],
                 outputs=[robot_video],
                 trigger=listen_btn.click,
             )
-            # html_code = """
-            #     <html>
-            #     <body>
-            #         <img src="/video_feed" style="width: 100%; max-width: 1080px; border-radius: 8px;">
-            #     </body>
-            #     </html>
-            #     """
-            # sim_view = gr.HTML(value=html_code, label="🎬 Robot Simulation")
-            # # --- Movement Builders ---
-            # with gr.Tabs():
-            #     with gr.Tab("✨ Presets & Sequences"):
-            #         gr.Markdown("### Quick Actions")
-            #         with gr.Row(variant="panel"):
-            #             for name in PRESETS:
-            #                 btn = gr.Button(name, size="sm")
-            #                 btn.click(manager.add_preset, inputs=[gr.State(name)], outputs=[queue_display, status_box])
-            #         gr.Markdown("### Sequences")
-            #         with gr.Row():
-            #             for seq in SEQUENCES:
-            #                 btn = gr.Button(f"🎬 {seq}", size="sm")
-            #                 btn.click(manager.add_sequence, inputs=[gr.State(seq)], outputs=[queue_display, status_box])
-            #     with gr.Tab("🛠️ Custom Move"):
-            #         with gr.Row():
-            #             c_x = gr.Slider(-50, 50, 0, label="X")
-            #             c_y = gr.Slider(-50, 50, 0, label="Y")
-            #             c_z = gr.Slider(-20, 50, 0, label="Z")
-            #         with gr.Row():
-            #             c_r = gr.Slider(-30, 30, 0, label="Roll")
-            #             c_p = gr.Slider(-30, 30, 0, label="Pitch")
-            #             c_y_aw = gr.Slider(-45, 45, 0, label="Yaw")
-            #         with gr.Row():
-            #             c_la = gr.Slider(-180, 180, 0, label="Left Ant")
-            #             c_ra = gr.Slider(-180, 180, 0, label="Right Ant")
-            #         c_dur = gr.Slider(0.1, 5.0, 1.0, label="Duration")
-            #         c_add = gr.Button("➕ Add Custom Move", variant="primary")
-            #         def _add_custom(x,y,z,r,p,yw,la,ra,d):
-            #             m = Movement("Custom", x,y,z,r,p,yw,la,ra,d)
-            #             return manager.add_movement(m)
-            #         c_add.click(_add_custom,
-            #                     inputs=[c_x, c_y, c_z, c_r, c_p, c_y_aw, c_la, c_ra, c_dur],
-            #                     outputs=[queue_display, status_box])
-    # --- Event Wiring ---
-    # auto_play.change(lambda x: setattr(manager, 'auto_play', x), inputs=[auto_play])
-    # play_btn.click(manager.play_queue, inputs=[speed], outputs=[queue_display, status_box])
-    # stop_btn.click(manager.stop_playback, outputs=[queue_display, status_box])
-    # clear_btn.click(manager.clear_queue, outputs=[queue_display, status_box])
-    # undo_btn.click(manager.remove_last, outputs=[queue_display, status_box])
-    demo.load(
-        None,
-        None,
-        None,
-        js="""
-        () => {
-            const keyMap = {
-                'w': 'btn-forward',
-                's': 'btn-back',
-                'a': 'btn-left',
-                'd': 'btn-right',
-                'q': 'btn-tilt-up',
-                'e': 'btn-tilt-down',
-                'h': 'btn-center',
-                'j': 'btn-body-left',
-                'l': 'btn-body-right',
-            };
-            let lastPressed = {};
-            const REPEAT_MS = 120;  // minimum time between repeated presses
-            document.addEventListener('keydown', (ev) => {
-                const key = ev.key.toLowerCase();
-                const id = keyMap[key];
-                if (!id) return;
-                const now = Date.now();
-                if (lastPressed[key] && now - lastPressed[key] < REPEAT_MS) {
-                    return;  // simple debounce
-                }
-                lastPressed[key] = now;
-                // Prevent page scrolling with space, etc
-                ev.preventDefault();
-                const btn = document.getElementById(id);
-                if (btn) {
-                    btn.click();
-                }
-            });
-            console.log('Keyboard control ready: WASD for x/y, Q/E for pitch, J/L for body yaw, H for center');
-        }
-        """,
-    )
-# --- 6. Mount & Run ---
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
     print("🚀 Server starting on http://0.0.0.0:7860")
-    print("ℹ️  Point your Robot/Sim to: ws://<YOUR_PC_IP>:7860/robot")
     uvicorn.run(app, host="0.0.0.0", port=7860, proxy_headers=True, forwarded_allow_ips="*")

 """
+Reachy Mini Controller
 A centralized server that listens for Robot connections and hosts a Gradio control interface.
 """
 import asyncio
 import threading
 import time
 import queue
 from dataclasses import dataclass
+from typing import Optional, Tuple
 import cv2
 import gradio as gr
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from fastapi.responses import StreamingResponse
 import uvicorn
+from fastrtc import WebRTC, StreamHandler, get_cloudflare_turn_credentials
+from reachy_mini.utils import create_head_pose
+# -------------------------------------------------------------------
+# 1. Configuration
+# -------------------------------------------------------------------
 AUDIO_SAMPLE_RATE = 16000  # respeaker samplerate
+# Audio queue configuration
+MAX_AUDIO_QUEUE_SIZE = 2
+# Movement step sizes
+NUDGE_ANGLE = 5.0     # degrees for head roll / yaw
+NUDGE_BODY = 0.3      # degrees for body_yaw
+NUDGE_PITCH = 5.0     # degrees for pitch
+# Video loop timing
+FRAME_SLEEP_S = 0.02
+# TURN config
+TURN_TTL_SERVER_MS = 360_000
+USE_VIDEO_WEBRTC = True
+turn_credentials = None# get_cloudflare_turn_credentials()
+server_turn_credentials = None# get_cloudflare_turn_credentials(ttl=TURN_TTL_SERVER_MS)
+# -------------------------------------------------------------------
+# 2. Data Models
+# -------------------------------------------------------------------
 @dataclass
 class Movement:
     name: str
     right_antenna: Optional[float] = None
     duration: float = 1.0
+# -------------------------------------------------------------------
+# 3. Global State
+# -------------------------------------------------------------------
 class GlobalState:
     """
     Singleton-style class to manage shared state between FastAPI (WebSockets)
+    and Gradio (UI).
     """
     def __init__(self):
         # Connection handles
         self.robot_ws: Optional[WebSocket] = None
         self.robot_loop: Optional[asyncio.AbstractEventLoop] = None
         # Video Stream Data
         self.frame_lock = threading.Lock()
         self.black_frame = np.zeros((640, 640, 3), dtype=np.uint8)
+        _, buffer = cv2.imencode(".jpg", self.black_frame)
         self.latest_frame_bytes = buffer.tobytes()
         self.latest_frame_ts = time.time()
         # Audio from robot -> browser
+        # Queue of (sample_rate: int, audio_bytes: bytes)
+        self.audio_queue: "queue.Queue[Tuple[int, bytes]]" = queue.Queue()
+        # Audio from operator -> robot
+        self.audio_to_robot_queue: "queue.Queue[bytes]" = queue.Queue()
+        # Live pose state
         self.pose_lock = threading.Lock()
         self.current_pose = Movement(
             name="Current",
+            x=0,
+            y=0,
+            z=0,
+            roll=0,
+            pitch=0,
+            yaw=0,
             body_yaw=0,
             left_antenna=0,
             right_antenna=0,
             duration=0.2,
         )
+    # --- Connection management ---
+    def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop) -> None:
         self.robot_ws = ws
         self.robot_loop = loop
+    def clear_robot_connection(self) -> None:
+        self.robot_ws = None
+        self.robot_loop = None
+    # --- Video ---
+    def update_frame(self, frame_bytes: bytes) -> None:
         with self.frame_lock:
             self.latest_frame_bytes = frame_bytes
             self.latest_frame_ts = time.time()
+    # --- Audio queues ---
+    @staticmethod
+    def _push_bounded(q: queue.Queue, item, max_size: int, description: str) -> None:
+        while q.qsize() >= max_size:
             try:
+                dropped = q.get_nowait()
+                del dropped
+                print(f"Dropping oldest audio {description}, queue size is {q.qsize()}")
             except queue.Empty:
                 break
+        q.put(item)
+    def push_audio_from_robot(self, audio_bytes: bytes) -> None:
+        self._push_bounded(
+            self.audio_queue,
+            (AUDIO_SAMPLE_RATE, audio_bytes),
+            MAX_AUDIO_QUEUE_SIZE,
+            "FROM robot",
+        )
+    def push_audio_to_robot(self, audio_bytes: bytes) -> None:
+        self._push_bounded(
+            self.audio_to_robot_queue,
+            audio_bytes,
+            MAX_AUDIO_QUEUE_SIZE,
+            "TO robot",
+        )
     def get_audio_to_robot_blocking(self) -> bytes:
+        try:
+            return self.audio_to_robot_queue.get(timeout=0.2)
+        except queue.Empty:
+            return None
+    # --- Status ---
     def get_connection_status(self) -> str:
+        return "✅ Robot Connected" if self.robot_ws else "🔴 Waiting for Robot..."
+    # --- Pose management ---
     def update_pose(
         self,
         dyaw: float = 0,
         dbody_yaw: float = 0,
     ) -> Movement:
         with self.pose_lock:
             p = self.current_pose
                 duration=0.4,
             )
+            # Clamp posed values
             new.pitch = float(np.clip(new.pitch, -30, 30))
             new.yaw = float(np.clip(new.yaw, -180, 180))
             new.roll = float(np.clip(new.roll, -40, 40))
             return new
     def reset_pose(self) -> Movement:
         with self.pose_lock:
             self.current_pose = Movement(
                 name="Current",
+                x=0,
+                y=0,
+                z=0,
+                roll=0,
+                pitch=0,
+                yaw=0,
                 body_yaw=0,
                 left_antenna=0,
                 right_antenna=0,
             return self.current_pose
     def get_pose_text(self) -> str:
         with self.pose_lock:
             p = self.current_pose
             return (
+                "Head position:\n"
                 f"  x={p.x:.1f}, y={p.y:.1f}, z={p.z:.1f}\n"
                 f"  roll={p.roll:.1f}, pitch={p.pitch:.1f}, yaw={p.yaw:.1f}\n"
+                "Body:\n"
                 f"  body_yaw={p.body_yaw:.1f}"
             )
 state = GlobalState()
+# -------------------------------------------------------------------
+# 4. Robot commands
+# -------------------------------------------------------------------
 def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
     if not (state.robot_ws and state.robot_loop):
         return state.get_pose_text(), "⚠️ Robot not connected"
     pose = create_head_pose(
+        x=mov.x,
+        y=mov.y,
+        z=mov.z,
+        roll=mov.roll,
+        pitch=mov.pitch,
+        yaw=mov.yaw,
+        degrees=True,
+        mm=True,
     )
     payload = {
     return state.get_pose_text(), f"✅ {msg}"
+# -------------------------------------------------------------------
+# 5. Video streaming helpers
+# -------------------------------------------------------------------
+def generate_mjpeg_stream():
+    last_timestamp = 0.0
+    while True:
+        with state.frame_lock:
+            current_bytes = state.latest_frame_bytes
+            current_timestamp = state.latest_frame_ts
+        if current_timestamp > last_timestamp and current_bytes is not None:
+            last_timestamp = current_timestamp
+            yield (
+                b"--frame\r\n"
+                b"Content-Type: image/jpeg\r\n\r\n" + current_bytes + b"\r\n"
+            )
+        else:
+            time.sleep(FRAME_SLEEP_S)
+            continue
+        time.sleep(FRAME_SLEEP_S)
+def webrtc_video_generator():
+    """
+    Generator for FastRTC WebRTC (mode='receive', modality='video').
+    """
+    last_ts = 0.0
+    frame = state.black_frame.copy()
+    while True:
+        with state.frame_lock:
+            ts = state.latest_frame_ts
+            frame_bytes = state.latest_frame_bytes
+        if ts > last_ts and frame_bytes:
+            last_ts = ts
+            np_bytes = np.frombuffer(frame_bytes, dtype=np.uint8)
+            decoded = cv2.imdecode(np_bytes, cv2.IMREAD_COLOR)
+            if decoded is not None:
+                frame = decoded
             else:
+                frame = state.black_frame.copy()
+        yield frame
+# -------------------------------------------------------------------
+# 6. FastAPI endpoints
+# -------------------------------------------------------------------
 app = FastAPI()
 @app.websocket("/robot")
 async def robot_endpoint(ws: WebSocket):
+    """Endpoint for the Robot to connect to (control channel)."""
     await ws.accept()
     state.set_robot_connection(ws, asyncio.get_running_loop())
+    print("[System] Robot Connected")
     try:
         while True:
+            msg = await ws.receive()
+            if msg.get("type") == "websocket.disconnect":
                 break
     except (WebSocketDisconnect, Exception):
         print("[System] Robot Disconnected")
     finally:
+        state.clear_robot_connection()
 @app.get("/video_feed")
 def video_feed():
     return StreamingResponse(
+        generate_mjpeg_stream(),
+        media_type="multipart/x-mixed-replace; boundary=frame",
     )
 @app.websocket("/video_stream")
 async def stream_endpoint(ws: WebSocket):
     """Endpoint for Robot/Sim to send video frames."""
     try:
         while True:
             msg = await ws.receive()
+            data = msg.get("bytes")
+            if data:
+                state.update_frame(data)
+    except asyncio.CancelledError:
+        print("[Video] stream_endpoint cancelled")
     except Exception:
+        print("[Video] stream_endpoint closed")
+    finally:
+        print("[Video] stream_endpoint closed")
 @app.websocket("/audio_stream")
 async def audio_endpoint(ws: WebSocket):
     print("[Audio] Stream Connected")
     async def robot_to_server():
         try:
             while True:
                 data = await ws.receive()
                 t = data.get("type")
                 if t == "websocket.disconnect":
                     print("[Audio] Disconnected (recv)")
                     break
                 if t == "websocket.receive":
                     if data.get("bytes"):
+                        state.push_audio_from_robot(data["bytes"])
                     elif data.get("text") == "ping":
                         print("[Audio] Received ping")
+        except asyncio.CancelledError:
+            print("[Audio] robot_to_server cancelled")
         except Exception as e:
             print(f"[Audio] robot_to_server error: {e}")
     async def server_to_robot():
         loop = asyncio.get_running_loop()
         try:
             while True:
                 chunk: bytes = await loop.run_in_executor(
                     None, state.get_audio_to_robot_blocking
                 )
+                if chunk is not None:
+                    await ws.send_bytes(chunk)
+        except asyncio.CancelledError:
+            print("[Audio] server_to_robot cancelled")
         except Exception as e:
             print(f"[Audio] server_to_robot error: {e}")
     try:
         await asyncio.gather(robot_to_server(), server_to_robot())
+    except asyncio.CancelledError:
+        print("[Audio] audio_endpoint cancelled")
     finally:
         print("[Audio] Stream Closed")
+# -------------------------------------------------------------------
+# 7. FastRTC audio handler
+# -------------------------------------------------------------------
 class RobotAudioHandler(StreamHandler):
     """
     FastRTC handler that connects browser WebRTC audio to the robot.
+    - receive(): browser mic -> state.audio_to_robot_queue -> /audio_stream -> robot
+    - emit(): state.audio_queue (robot) -> browser playback
     """
     def __init__(self) -> None:
+        super().__init__(
+            input_sample_rate=AUDIO_SAMPLE_RATE,
+            output_sample_rate=AUDIO_SAMPLE_RATE,
+        )
+    def receive(self, frame: Tuple[int, np.ndarray]) -> None:
         if frame is None:
             return
+        sample_rate, array = frame
         if array is None:
             return
         if arr.ndim > 1:
             arr = arr[0]
         if arr.dtype != np.int16:
             if np.issubdtype(arr.dtype, np.floating):
                 arr = np.clip(arr, -1.0, 1.0)
         state.push_audio_to_robot(arr.tobytes())
     def emit(self):
         try:
             sample_rate, frame_bytes = state.audio_queue.get(timeout=0.5)
             audio = np.frombuffer(frame_bytes, dtype=np.int16).reshape(1, -1)
             return sample_rate, audio
         except queue.Empty:
             return None
     def copy(self) -> "RobotAudioHandler":
         return RobotAudioHandler()
     def shutdown(self) -> None:
         pass
     def start_up(self) -> None:
         pass
+# -------------------------------------------------------------------
+# 8. Movement UI helpers
+# -------------------------------------------------------------------
+def get_pose_string():
+    """Returns pose in format JS can parse: pitch:X,yaw:Y,roll:Z,body:B"""
+    with state.pose_lock:
+        p = state.current_pose
+        return f"pitch:{p.pitch:.1f},yaw:{p.yaw:.1f},roll:{p.roll:.1f},body:{p.body_yaw:.1f}"
+def nudge_pose(dpitch=0, dyaw=0, droll=0, dbody_yaw=0, label="Move"):
+    """Modified to return pose string instead of tuple."""
+    mov = state.update_pose(
+        dpitch=dpitch,
+        dyaw=dyaw,
+        droll=droll,
+        dbody_yaw=dbody_yaw,
+    )
+    send_pose_to_robot(mov, label)
+    return get_pose_string()
+def center_pose():
+    """Modified to return pose string."""
+    mov = state.reset_pose()
+    send_pose_to_robot(mov, "Reset pose")
+    return get_pose_string()
+# -------------------------------------------------------------------
+# 9. Gradio UI
+# -------------------------------------------------------------------
+CUSTOM_CSS = """
+/* Dark theme overrides */
+.gradio-container {
+    background: linear-gradient(135deg, #0a0a0f 0%, #121218 100%) !important;
+    min-height: 100vh;
+}
+.dark {
+    --background-fill-primary: #12121a !important;
+    --background-fill-secondary: #1a1a24 !important;
+    --border-color-primary: #2a2a3a !important;
+    --text-color-subdued: #888 !important;
+}
+/* Header styling */
+#header-row {
+    background: transparent !important;
+    border: none !important;
+    margin-bottom: 1rem;
+    display: flex !important;
+    justify-content: space-between !important;
+    align-items: center !important;
+}
+#app-title {
+    font-size: 1.5rem !important;
+    font-weight: 600 !important;
+    background: linear-gradient(90deg, #fff, #888) !important;
+    -webkit-background-clip: text !important;
+    -webkit-text-fill-color: transparent !important;
+    border: none !important;
+    padding: 0 !important;
+    margin: 0 !important;
+}
+/* Status badge */
+#status-box {
+    flex-shrink: 0 !important;
+    width: auto !important;
+    max-width: 200px !important;
+    min-width: 160px !important;
+    background: rgba(16, 185, 129, 0.15) !important;
+    border: 1px solid rgba(16, 185, 129, 0.4) !important;
+    border-radius: 9999px !important;
+    padding: 0.4rem 1rem !important;
+    font-size: 0.875rem !important;
+}
+#status-box textarea {
+    background: transparent !important;
+    border: none !important;
+    color: #10b981 !important;
+    text-align: center !important;
+    font-weight: 500 !important;
+    padding: 0 !important;
+    min-height: unset !important;
+    height: auto !important;
+    line-height: 1.4 !important;
+}
+/* Video panel */
+#video-column {
+    background: #0f0f14 !important;
+    border-radius: 1rem !important;
+    border: 1px solid #2a2a3a !important;
+    overflow: hidden !important;
+    min-height: 500px !important;
+}
+#robot-video {
+    border-radius: 0.75rem !important;
+    overflow: hidden !important;
+}
+/* Control panel cards */
+.control-card {
+    background: rgba(26, 26, 36, 0.8) !important;
+    border: 1px solid #2a2a3a !important;
+    border-radius: 0.75rem !important;
+    padding: 1rem !important;
+}
+/* Audio section */
+#audio-section {
+    background: rgba(26, 26, 36, 0.8) !important;
+    border: 1px solid #2a2a3a !important;
+    border-radius: 0.75rem !important;
+}
+#listen-btn {
+    background: rgba(139, 92, 246, 0.2) !important;
+    border: 1px solid rgba(139, 92, 246, 0.3) !important;
+    color: #a78bfa !important;
+    border-radius: 0.5rem !important;
+    transition: all 0.2s !important;
+}
+#listen-btn:hover {
+    background: rgba(139, 92, 246, 0.3) !important;
+}
+/* Hide the default keyboard buttons */
+#keyboard-buttons {
+    display: none !important;
+}
+/* Quick action buttons */
+.quick-btn {
+    background: #1f1f2e !important;
+    border: 1px solid #2a2a3a !important;
+    border-radius: 0.5rem !important;
+    padding: 0.5rem !important;
+    font-size: 0.75rem !important;
+    transition: all 0.2s !important;
+}
+.quick-btn:hover {
+    background: #2a2a3a !important;
+}
+/* Keyboard visualization container */
+#keyboard-viz {
+    position: fixed;
+    bottom: 3.5rem;
+    right: 2rem;
+    z-index: 1000;
+    pointer-events: none;
+}
+/* Gauges container */
+#gauges-viz {
+    position: fixed;
+    bottom: 3.5rem;
+    left: 2rem;
+    z-index: 1000;
+    pointer-events: none;
+}
+/* Hide Gradio footer or make room for it */
+footer {
+    opacity: 0.5;
+}
+/* Hidden pose state (keep in DOM for JS) */
+#pose-state {
+    position: absolute !important;
+    opacity: 0 !important;
+    pointer-events: none !important;
+    height: 0 !important;
+    overflow: hidden !important;
+}
+"""
+KEYBOARD_VIZ_HTML = """
+<div id="keyboard-viz">
+  <div style="
+    background: rgba(0,0,0,0.75);
+    backdrop-filter: blur(12px);
+    border: 1px solid rgba(255,255,255,0.15);
+    border-radius: 16px;
+    padding: 16px;
+    display: flex;
+    flex-direction: column;
+    gap: 6px;
+    align-items: center;
+  ">
+    <div style="display: flex; gap: 6px;">
+      <div class="key" data-key="q">Q</div>
+      <div class="key" data-key="w">W</div>
+      <div class="key" data-key="e">E</div>
+    </div>
+    <div style="display: flex; gap: 6px;">
+      <div class="key" data-key="a">A</div>
+      <div class="key" data-key="s">S</div>
+      <div class="key" data-key="d">D</div>
+    </div>
+    <div style="display: flex; gap: 6px; margin-top: 6px;">
+      <div class="key" data-key="j">J</div>
+      <div class="key" data-key="h">H</div>
+      <div class="key" data-key="l">L</div>
+    </div>
+  </div>
+</div>
+<style>
+  .key {
+    width: 48px;
+    height: 48px;
+    background: linear-gradient(180deg, #3a3a4a 0%, #2a2a3a 100%);
+    border: 1px solid #4a4a5a;
+    border-radius: 8px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 14px;
+    font-weight: 600;
+    color: #ccc;
+    font-family: system-ui, sans-serif;
+    transition: all 0.1s;
+  }
+  .key.active {
+    background: linear-gradient(180deg, #8b5cf6 0%, #7c3aed 100%);
+    border-color: #a78bfa;
+    color: white;
+    box-shadow: 0 0 16px rgba(139, 92, 246, 0.6);
+    transform: scale(0.95);
+  }
+</style>
+"""
+GAUGES_HTML = """
+<div id="gauges-viz">
+  <div style="
+    background: rgba(0,0,0,0.75);
+    backdrop-filter: blur(12px);
+    border: 1px solid rgba(255,255,255,0.15);
+    border-radius: 16px;
+    padding: 16px 20px;
+    display: flex;
+    gap: 24px;
+  ">
+    <div class="gauge-container">
+      <svg width="72" height="48" viewBox="0 0 72 48">
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#8b5cf6" stroke-width="5" stroke-linecap="round"
+              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="pitch"/>
+        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
+              class="gauge-needle" data-gauge="pitch" transform="rotate(0, 36, 42)"/>
+        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
+      </svg>
+      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Pitch</div>
+      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="pitch">0.0°</div>
+    </div>
+    <div class="gauge-container">
+      <svg width="72" height="48" viewBox="0 0 72 48">
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#06b6d4" stroke-width="5" stroke-linecap="round"
+              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="yaw"/>
+        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
+              class="gauge-needle" data-gauge="yaw" transform="rotate(0, 36, 42)"/>
+        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
+      </svg>
+      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Yaw</div>
+      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="yaw">0.0°</div>
+    </div>
+    <div class="gauge-container">
+      <svg width="72" height="48" viewBox="0 0 72 48">
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#f59e0b" stroke-width="5" stroke-linecap="round"
+              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="roll"/>
+        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
+              class="gauge-needle" data-gauge="roll" transform="rotate(0, 36, 42)"/>
+        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
+      </svg>
+      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Roll</div>
+      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="roll">0.0°</div>
+    </div>
+    <div style="width: 1px; background: #3a3a4a; margin: 0 4px;"></div>
+    <div class="gauge-container">
+      <svg width="72" height="48" viewBox="0 0 72 48">
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
+        <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#ec4899" stroke-width="5" stroke-linecap="round"
+              stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="body"/>
+        <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
+              class="gauge-needle" data-gauge="body" transform="rotate(0, 36, 42)"/>
+        <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
+      </svg>
+      <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Body</div>
+      <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="body">0.0°</div>
+    </div>
+  </div>
+</div>
+"""
+KEYBOARD_JS = """
+() => {
+    const keyMap = {
+        'w': 'w', 's': 's', 'a': 'a', 'd': 'd',
+        'q': 'q', 'e': 'e', 'h': 'h', 'j': 'j', 'l': 'l',
+    };
+    const btnMap = {
+        'w': 'btn-forward', 's': 'btn-back', 'a': 'btn-left', 'd': 'btn-right',
+        'q': 'btn-tilt-up', 'e': 'btn-tilt-down', 'h': 'btn-center',
+        'j': 'btn-body-left', 'l': 'btn-body-right',
+    };
+    let lastPressed = {};
+    const REPEAT_MS = 120;
+    document.addEventListener('keydown', (ev) => {
+        const key = ev.key.toLowerCase();
+        if (!keyMap[key]) return;
+        // Visual feedback
+        const keyEl = document.querySelector(`.key[data-key="${key}"]`);
+        if (keyEl) keyEl.classList.add('active');
+        // Rate limit and trigger button
+        const now = Date.now();
+        if (lastPressed[key] && now - lastPressed[key] < REPEAT_MS) return;
+        lastPressed[key] = now;
+        ev.preventDefault();
+        const btn = document.getElementById(btnMap[key]);
+        if (btn) btn.click();
+    });
+    document.addEventListener('keyup', (ev) => {
+        const key = ev.key.toLowerCase();
+        const keyEl = document.querySelector(`.key[data-key="${key}"]`);
+        if (keyEl) keyEl.classList.remove('active');
+    });
+    // Watch pose-state textbox for changes and update gauges
+    const updateGaugesFromState = () => {
+        const poseEl = document.querySelector('#pose-state textarea');
+        if (!poseEl) return;
+        const text = poseEl.value;
+        // Parse: "pitch:0.0,yaw:0.0,roll:0.0,body:0.0"
+        const match = text.match(/pitch:([\\d.-]+),yaw:([\\d.-]+),roll:([\\d.-]+),body:([\\d.-]+)/);
+        if (!match) return;
+        const pitch = parseFloat(match[1]);
+        const yaw = parseFloat(match[2]);
+        const roll = parseFloat(match[3]);
+        const body = parseFloat(match[4]);
+        const gauges = { pitch: [-30, 30], yaw: [-180, 180], roll: [-40, 40], body: [-3, 3] };
+        const values = { pitch, yaw, roll, body };
+        Object.entries(gauges).forEach(([name, [min, max]]) => {
+            const value = values[name];
+            const normalized = (value - min) / (max - min);
+            const angle = (normalized - 0.5) * 180;
+            const needle = document.querySelector(`.gauge-needle[data-gauge="${name}"]`);
+            if (needle) needle.setAttribute('transform', `rotate(${angle}, 36, 42)`);
+            const display = document.querySelector(`.gauge-value[data-gauge="${name}"]`);
+            if (display) display.textContent = value.toFixed(1) + '°';
+        });
+    };
+    // Poll for pose updates every 100ms
+    setInterval(updateGaugesFromState, 100);
+    // Update status box styling based on connection state
+    const updateStatusStyle = () => {
+        const statusBox = document.querySelector('#status-box');
+        if (!statusBox) return;
+        const textarea = statusBox.querySelector('textarea');
+        if (!textarea) return;
+        const isConnected = textarea.value.includes('Connected');
+        if (isConnected) {
+            statusBox.style.background = 'rgba(16, 185, 129, 0.15)';
+            statusBox.style.borderColor = 'rgba(16, 185, 129, 0.4)';
+            textarea.style.color = '#10b981';
+        } else {
+            statusBox.style.background = 'rgba(239, 68, 68, 0.15)';
+            statusBox.style.borderColor = 'rgba(239, 68, 68, 0.4)';
+            textarea.style.color = '#ef4444';
+        }
+    };
+    setInterval(updateStatusStyle, 500);
+    console.log('🎮 Keyboard controls ready');
+}
+"""
+# -------------------------------------------------------------------
+# Gradio UI with new styling
+# -------------------------------------------------------------------
+with gr.Blocks(
+    title="Reachy Controller",
+    theme=gr.themes.Base(
+        primary_hue="violet",
+        neutral_hue="slate",
+    ),
+    css=CUSTOM_CSS,
+) as demo:
+    # Header
+    with gr.Row(elem_id="header-row"):
+        gr.Markdown("## 🤖 Reachy Mini", elem_id="app-title")
+        status_box = gr.Textbox(
+            value=state.get_connection_status,
+            every=2,
+            show_label=False,
+            container=False,
+            elem_id="status-box",
+        )
     with gr.Row():
+        # Left column - Controls
         with gr.Column(scale=1):
+            # Hidden pose state textbox - polls pose for JS gauges
+            pose_state = gr.Textbox(
+                value=get_pose_string,
+                every=0.2,
+                show_label=False,
+                container=False,
+                elem_id="pose-state",
             )
+            # Audio section
+            with gr.Group(elem_id="audio-section"):
+                gr.Markdown("### 🎧 Audio")
+                listen_btn = gr.Button("🎤 Start Listening", elem_id="listen-btn")
                 robot_audio = WebRTC(
+                    label="",
                     modality="audio",
                     mode="send-receive",
+                    rtc_configuration=turn_credentials,
+                    server_rtc_configuration=server_turn_credentials,
+                    full_screen=False,
                 )
                 robot_audio.stream(
                     fn=RobotAudioHandler(),
                     inputs=[robot_audio],
                     outputs=[robot_audio],
                 )
+            # Quick actions
+            with gr.Group(elem_classes="control-card"):
+                gr.Markdown("### ⚡ Quick Actions")
+                with gr.Row():
+                    btn_center_quick = gr.Button("🏠 Center", elem_classes="quick-btn")
+                    btn_look_up = gr.Button("👀 Look Up", elem_classes="quick-btn")
+                with gr.Row():
+                    btn_curious = gr.Button("🎭 Curious", elem_classes="quick-btn")
+                    btn_excited = gr.Button("🎉 Excited", elem_classes="quick-btn")
+            # Hidden keyboard buttons (still needed for JS clicks)
+            with gr.Group(elem_id="keyboard-buttons"):
+                btn_forward = gr.Button("W", elem_id="btn-forward")
+                btn_back = gr.Button("S", elem_id="btn-back")
+                btn_left = gr.Button("A", elem_id="btn-left")
+                btn_right = gr.Button("D", elem_id="btn-right")
+                btn_tilt_up = gr.Button("Q", elem_id="btn-tilt-up")
+                btn_tilt_down = gr.Button("E", elem_id="btn-tilt-down")
+                btn_body_left = gr.Button("J", elem_id="btn-body-left")
+                btn_body_right = gr.Button("L", elem_id="btn-body-right")
+                btn_center = gr.Button("H", elem_id="btn-center")
+            # Wire up hidden buttons - outputs required for Gradio to execute!
+            btn_forward.click(
+                lambda: nudge_pose(dpitch=-NUDGE_PITCH, label="W"),
+                outputs=[pose_state],
+            )
+            btn_back.click(
+                lambda: nudge_pose(dpitch=NUDGE_PITCH, label="S"),
+                outputs=[pose_state],
+            )
+            btn_left.click(
+                lambda: nudge_pose(dyaw=NUDGE_ANGLE * 2, label="A"),
+                outputs=[pose_state],
+            )
+            btn_right.click(
+                lambda: nudge_pose(dyaw=-NUDGE_ANGLE * 2, label="D"),
+                outputs=[pose_state],
+            )
+            btn_tilt_up.click(
+                lambda: nudge_pose(droll=-NUDGE_ANGLE, label="Q"),
+                outputs=[pose_state],
+            )
+            btn_tilt_down.click(
+                lambda: nudge_pose(droll=NUDGE_ANGLE, label="E"),
+                outputs=[pose_state],
+            )
+            btn_body_left.click(
+                lambda: nudge_pose(dbody_yaw=NUDGE_BODY, label="J"),
+                outputs=[pose_state],
+            )
+            btn_body_right.click(
+                lambda: nudge_pose(dbody_yaw=-NUDGE_BODY, label="L"),
+                outputs=[pose_state],
+            )
+            btn_center.click(center_pose, outputs=[pose_state])
+            # Wire up quick action buttons
+            btn_center_quick.click(center_pose, outputs=[pose_state])
+            btn_look_up.click(
+                lambda: nudge_pose(dpitch=-15, label="Look Up"),
+                outputs=[pose_state],
+            )
+            btn_curious.click(
+                lambda: nudge_pose(dpitch=-10, droll=15, label="Curious"),
+                outputs=[pose_state],
+            )
+            btn_excited.click(
+                lambda: nudge_pose(dpitch=-5, droll=-10, label="Excited"),
+                outputs=[pose_state],
+            )
+        # Right column - Video
+        with gr.Column(scale=2, elem_id="video-column"):
             robot_video = WebRTC(
+                label="",
                 modality="video",
                 mode="receive",
+                rtc_configuration=turn_credentials,
+                server_rtc_configuration=server_turn_credentials,
+                elem_id="robot-video",
             )
             robot_video.stream(
+                fn=webrtc_video_generator,
                 inputs=[],
                 outputs=[robot_video],
                 trigger=listen_btn.click,
             )
+    # Floating keyboard visualization
+    gr.HTML(KEYBOARD_VIZ_HTML)
+    gr.HTML(GAUGES_HTML)
+    # Load keyboard handler
+    demo.load(None, None, None, js=KEYBOARD_JS)
+# -------------------------------------------------------------------
+# 10. Mount & run
+# -------------------------------------------------------------------
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
     print("🚀 Server starting on http://0.0.0.0:7860")
+    print("ℹ️ Point your Robot/Sim to: ws://<YOUR_PC_IP>:7860/robot")
     uvicorn.run(app, host="0.0.0.0", port=7860, proxy_headers=True, forwarded_allow_ips="*")