reachy_mini_remote_control

Running

App Files Files Community

andito HF Staff commited on 16 days ago

Commit

156b337

verified ·

1 Parent(s): a7d26ce

Update app.py

Browse files

Files changed (1) hide show

app.py +536 -163

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ import numpy as np
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from fastapi.responses import StreamingResponse
 import uvicorn
-from fastrtc import WebRTC, get_cloudflare_turn_credentials_async
 # Try to import the utility, handle error if running in standalone test without library
 try:
@@ -29,35 +29,45 @@ except ImportError:
 AUDIO_SAMPLE_RATE = 16000  # respeaker samplerate
-import struct
 async def get_credentials():
-    # Will use HF_TOKEN env var inside the Space
-    return await get_cloudflare_turn_credentials_async()
-def gen_wav_header(sample_rate, bits_per_sample, channels):
-    """
-    Generates a generic WAV header.
-    We set the file size to 0xFFFFFFFF (max) to trick the browser into
-    thinking it's a very long file for streaming purposes.
-    """
-    datasize = 0xFFFFFFFF
-    o = bytes("RIFF", 'ascii')  # (4byte) Marks file as RIFF
-    o += struct.pack('<I', datasize + 36)  # (4byte) File size in bytes excluding "RIFF" and size
-    o += bytes("WAVE", 'ascii')  # (4byte) File type
-    o += bytes("fmt ", 'ascii')  # (4byte) Format Chunk Marker
-    o += struct.pack('<I', 16)  # (4byte) Length of above format data
-    o += struct.pack('<H', 1)  # (2byte) Format type (1 - PCM)
-    o += struct.pack('<H', channels)  # (2byte)
-    o += struct.pack('<I', sample_rate)  # (4byte)
-    o += struct.pack('<I', sample_rate * channels * bits_per_sample // 8)  # (4byte)
-    o += struct.pack('<H', channels * bits_per_sample // 8)  # (2byte)
-    o += struct.pack('<H', bits_per_sample)  # (2byte)
-    o += bytes("data", 'ascii')  # (4byte) Data Chunk Marker
-    o += struct.pack('<I', datasize)  # (4byte) Data size in bytes
-    return o
-# --- 1. Global State Management ---
 class GlobalState:
     """
     Singleton-style class to manage shared state between FastAPI (WebSockets)
@@ -75,9 +85,26 @@ class GlobalState:
         self.latest_frame_bytes = buffer.tobytes()
         self.latest_frame_ts = time.time()
-        # Audio Stream Data
         self.audio_queue = queue.Queue()
     def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop):
         self.robot_ws = ws
         self.robot_loop = loop
@@ -92,59 +119,148 @@ class GlobalState:
         Pushes raw audio bytes.
         If the queue is full (meaning we are lagging), throw away the OLDEST audio.
         """
-        # Limit queue to ~0.5 seconds of audio (approx 5-6 chunks of 4096 bytes)
-        MAX_QUEUE_SIZE = 6
         while self.audio_queue.qsize() >= MAX_QUEUE_SIZE:
             try:
-                # Drop the oldest chunk to make room for the new one (Keep 'Now', drop 'Past')
-                print("Dropping oldest audio, queue size is", self.audio_queue.qsize())
                 self.audio_queue.get_nowait()
             except queue.Empty:
-                pass
-        self.audio_queue.put(audio_bytes)
     def get_connection_status(self) -> str:
         if self.robot_ws:
             return "✅ Robot Connected"
         return "🔴 Waiting for Robot..."
-state = GlobalState()
-# --- 2. Data Models & Presets ---
-@dataclass
-class Movement:
-    name: str
-    x: float = 0
-    y: float = 0
-    z: float = 0
-    roll: float = 0
-    pitch: float = 0
-    yaw: float = 0
-    body_yaw: float = 0
-    left_antenna: Optional[float] = None
-    right_antenna: Optional[float] = None
-    duration: float = 1.0
-PRESETS = {
-    "Home": Movement("Home", 0, 0, 0, 0, 0, 0, 0, 0, 0),
-    "Look Left": Movement("Look Left", 0, 0, 0, 0, 0, 30, 1, 0, 0),
-    "Look Right": Movement("Look Right", 0, 0, 0, 0, 0, -30, -1, 0, 0),
-    "Look Up": Movement("Look Up", 0, 0, 0, 0, -20, 0, 0, 0, 0),
-    "Look Down": Movement("Look Down", 0, 0, 0, 0, 15, 0, 0, 0, 0),
-    "Curious": Movement("Curious", 10, 0, 10, 15, -10, -15, 0,  45, -45),
-    "Excited": Movement("Excited", 0, 0, 20, 0, -15, 0, 0, 90, 90),
-    "Shy": Movement("Shy", -10, 0, -10, 10, 10, 20, 0, -30, 30),
-}
-SEQUENCES = {
-    "Wave": ["Home", "Look Left", "Look Right", "Look Left", "Look Right", "Home"],
-    "Nod": ["Home", "Look Down", "Look Up", "Look Down", "Home"],
-    "Excited Dance": ["Home", "Excited", "Look Left", "Look Right", "Home"],
-}
 # --- 3. Controller Logic ---
 class MovementManager:
@@ -336,27 +452,120 @@ async def stream_endpoint(ws: WebSocket):
 @app.websocket("/audio_stream")
 async def audio_endpoint(ws: WebSocket):
-    """Endpoint for Robot/Sim to send raw Audio bytes (int16)."""
     await ws.accept()
     print("[Audio] Stream Connected")
     try:
-        while True:
-            # Receive bytes directly
-            data = await ws.receive()
-            if data.get('type') == 'websocket.receive' and data.get('bytes'):
-                state.push_audio(data.get('bytes'))
-            elif data.get('type') == 'websocket.receive' and data.get('text') == "ping":
-                print("[Audio] Received ping")
-            elif data.get('type') == 'websocket.disconnect':
-                print("[Audio] Disconnected")
-                break
-            else:
-                print(f"[Audio] Received unknown message: {data}")
-    except Exception as e:
-        print(f"[Audio] Disconnected: {e}")
 # --- 5. Gradio Interface ---
 def webrtc_audio_generator():
     """
     Generator for FastRTC.
@@ -375,7 +584,7 @@ def webrtc_audio_generator():
         try:
             # Wait up to 1 second for data. If no data, loop again.
             # Do NOT use a short timeout combined with silence generation.
-            chunk_bytes = state.audio_queue.get(timeout=1.0)
             if chunk_bytes:
                 byte_buffer.extend(chunk_bytes)
         except queue.Empty:
@@ -394,6 +603,35 @@ def webrtc_audio_generator():
             yield (AUDIO_SAMPLE_RATE, audio_int16)
 def webrtc_video_generator():
     """
     Generator for FastRTC WebRTC (mode='receive', modality='video').
@@ -417,6 +655,64 @@ def webrtc_video_generator():
         # Shape (H, W, 3), dtype uint8
         yield frame
 with gr.Blocks(title="Reachy Controller", theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🤖 Reachy Mini Controller")
@@ -425,7 +721,12 @@ with gr.Blocks(title="Reachy Controller", theme=gr.themes.Soft()) as demo:
         # --- LEFT COLUMN: Controls ---
         with gr.Column(scale=1):
             status_box = gr.Textbox(label="System Status", value=state.get_connection_status, every=2)
             with gr.Group():
                 gr.Markdown("### 🎧 Audio Listen")
@@ -436,105 +737,177 @@ with gr.Blocks(title="Reachy Controller", theme=gr.themes.Soft()) as demo:
                 robot_audio = WebRTC(
                     label="Robot Audio",
                     modality="audio",
-                    mode="receive",
-                    # Optional niceties:
-                    # icon="phone-solid.svg",
-                    # icon_button_color="black",
-                    # pulse_color="black",
                 )
-                # Wire generator to WebRTC component
                 robot_audio.stream(
-                    fn=lambda: webrtc_audio_generator(),
-                    inputs=None,
                     outputs=[robot_audio],
-                    trigger=listen_btn.click,
                 )
-            with gr.Group():
-                gr.Markdown("### 🎮 Playback")
-                auto_play = gr.Checkbox(label="Auto-play", value=True)
-                speed = gr.Slider(0.5, 2.0, 1.0, label="Speed")
-                with gr.Row():
-                    play_btn = gr.Button("▶️ Play", variant="primary")
-                    stop_btn = gr.Button("⏹️ Stop")
-                with gr.Row():
-                    clear_btn = gr.Button("🗑️ Clear")
-                    undo_btn = gr.Button("↶ Undo")
-            queue_display = gr.Textbox(label="Queue", value=manager.get_queue_text, lines=10)
         # --- RIGHT COLUMN: View ---
         with gr.Column(scale=2):
-            # robot_video = WebRTC(
-            #     label="Robot Video",
-            #     modality="video",
-            #     mode="receive",
-            # )
-            # robot_video.stream(
-            #     fn=lambda: webrtc_video_generator(),
-            #     inputs=None,
-            #     outputs=[robot_video],
-            #     trigger=listen_btn.click,
-            # )
-            html_code = """
-                <html>
-                <body>
-                    <img src="/video_feed" style="width: 100%; max-width: 1080px; border-radius: 8px;">
-                </body>
-                </html>
-                """
-            sim_view = gr.HTML(value=html_code, label="🎬 Robot Simulation")
-            # --- Movement Builders ---
-            with gr.Tabs():
-                with gr.Tab("✨ Presets & Sequences"):
-                    gr.Markdown("### Quick Actions")
-                    with gr.Row(variant="panel"):
-                        for name in PRESETS:
-                            btn = gr.Button(name, size="sm")
-                            btn.click(manager.add_preset, inputs=[gr.State(name)], outputs=[queue_display, status_box])
-                    gr.Markdown("### Sequences")
-                    with gr.Row():
-                        for seq in SEQUENCES:
-                            btn = gr.Button(f"🎬 {seq}", size="sm")
-                            btn.click(manager.add_sequence, inputs=[gr.State(seq)], outputs=[queue_display, status_box])
-                with gr.Tab("🛠️ Custom Move"):
-                    with gr.Row():
-                        c_x = gr.Slider(-50, 50, 0, label="X")
-                        c_y = gr.Slider(-50, 50, 0, label="Y")
-                        c_z = gr.Slider(-20, 50, 0, label="Z")
-                    with gr.Row():
-                        c_r = gr.Slider(-30, 30, 0, label="Roll")
-                        c_p = gr.Slider(-30, 30, 0, label="Pitch")
-                        c_y_aw = gr.Slider(-45, 45, 0, label="Yaw")
-                    with gr.Row():
-                        c_la = gr.Slider(-180, 180, 0, label="Left Ant")
-                        c_ra = gr.Slider(-180, 180, 0, label="Right Ant")
-                    c_dur = gr.Slider(0.1, 5.0, 1.0, label="Duration")
-                    c_add = gr.Button("➕ Add Custom Move", variant="primary")
-                    def _add_custom(x,y,z,r,p,yw,la,ra,d):
-                        m = Movement("Custom", x,y,z,r,p,yw,la,ra,d)
-                        return manager.add_movement(m)
-                    c_add.click(_add_custom,
-                                inputs=[c_x, c_y, c_z, c_r, c_p, c_y_aw, c_la, c_ra, c_dur],
-                                outputs=[queue_display, status_box])
     # --- Event Wiring ---
-    auto_play.change(lambda x: setattr(manager, 'auto_play', x), inputs=[auto_play])
-    play_btn.click(manager.play_queue, inputs=[speed], outputs=[queue_display, status_box])
-    stop_btn.click(manager.stop_playback, outputs=[queue_display, status_box])
-    clear_btn.click(manager.clear_queue, outputs=[queue_display, status_box])
-    undo_btn.click(manager.remove_last, outputs=[queue_display, status_box])
 # --- 6. Mount & Run ---
 app = gr.mount_gradio_app(app, demo, path="/")

 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from fastapi.responses import StreamingResponse
 import uvicorn
+from fastrtc import WebRTC, StreamHandler, get_cloudflare_turn_credentials_async, get_cloudflare_turn_credentials
 # Try to import the utility, handle error if running in standalone test without library
 try:
 AUDIO_SAMPLE_RATE = 16000  # respeaker samplerate
+import os
 async def get_credentials():
+    # Will use HF_TOKEN env var inside the Space. There is a limit of 10GB per month: https://fastrtc.org/deployment/
+    return await get_cloudflare_turn_credentials_async(hf_token=os.getenv("HF_TOKEN"))
+# --- 1. Data Models & Presets ---
+@dataclass
+class Movement:
+    name: str
+    x: float = 0
+    y: float = 0
+    z: float = 0
+    roll: float = 0
+    pitch: float = 0
+    yaw: float = 0
+    body_yaw: float = 0
+    left_antenna: Optional[float] = None
+    right_antenna: Optional[float] = None
+    duration: float = 1.0
+PRESETS = {
+    "Home": Movement("Home", 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    "Look Left": Movement("Look Left", 0, 0, 0, 0, 0, 30, 1, 0, 0),
+    "Look Right": Movement("Look Right", 0, 0, 0, 0, 0, -30, -1, 0, 0),
+    "Look Up": Movement("Look Up", 0, 0, 0, 0, -20, 0, 0, 0, 0),
+    "Look Down": Movement("Look Down", 0, 0, 0, 0, 15, 0, 0, 0, 0),
+    "Curious": Movement("Curious", 10, 0, 10, 15, -10, -15, 0,  45, -45),
+    "Excited": Movement("Excited", 0, 0, 20, 0, -15, 0, 0, 90, 90),
+    "Shy": Movement("Shy", -10, 0, -10, 10, 10, 20, 0, -30, 30),
+}
+SEQUENCES = {
+    "Wave": ["Home", "Look Left", "Look Right", "Look Left", "Look Right", "Home"],
+    "Nod": ["Home", "Look Down", "Look Up", "Look Down", "Home"],
+    "Excited Dance": ["Home", "Excited", "Look Left", "Look Right", "Home"],
+}
+# --- 2. Global State Management ---
 class GlobalState:
     """
     Singleton-style class to manage shared state between FastAPI (WebSockets)
         self.latest_frame_bytes = buffer.tobytes()
         self.latest_frame_ts = time.time()
+        # Audio from robot -> browser
+        # Queue of (sample_rate: int, audio: np.ndarray[int16, shape=(1, N)])
         self.audio_queue = queue.Queue()
+        # Audio from operator/server -> robot
+        self.audio_to_robot_queue = queue.Queue()
+        # --- Live pose state (for WASDQE control) ---
+        self.pose_lock = threading.Lock()
+        self.current_pose = Movement(
+            name="Current",
+            x=0, y=0, z=0,
+            roll=0, pitch=0, yaw=0,
+            body_yaw=0,
+            left_antenna=0,
+            right_antenna=0,
+            duration=0.2,
+        )
     def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop):
         self.robot_ws = ws
         self.robot_loop = loop
         Pushes raw audio bytes.
         If the queue is full (meaning we are lagging), throw away the OLDEST audio.
         """
+        MAX_QUEUE_SIZE = 2  # keep latency low
         while self.audio_queue.qsize() >= MAX_QUEUE_SIZE:
             try:
+                print("Dropping oldest audio FROM robot, queue size is", self.audio_queue.qsize())
                 self.audio_queue.get_nowait()
             except queue.Empty:
+                break
+        self.audio_queue.put((AUDIO_SAMPLE_RATE, audio_bytes))
+    def push_audio_to_robot(self, audio_bytes: bytes):
+        """
+        Audio coming FROM the operator/server, going TO the robot.
+        """
+        MAX_QUEUE_SIZE = 2
+        while self.audio_to_robot_queue.qsize() >= MAX_QUEUE_SIZE:
+            try:
+                print("Dropping oldest audio TO robot, queue size is", self.audio_to_robot_queue.qsize())
+                self.audio_to_robot_queue.get_nowait()
+            except queue.Empty:
+                break
+        self.audio_to_robot_queue.put(audio_bytes)
+    def get_audio_to_robot_blocking(self) -> bytes:
+        """
+        Blocking get for the sender task in /audio_stream.
+        """
+        return self.audio_to_robot_queue.get()
     def get_connection_status(self) -> str:
         if self.robot_ws:
             return "✅ Robot Connected"
         return "🔴 Waiting for Robot..."
+    def update_pose(
+        self,
+        dx: float = 0,
+        dy: float = 0,
+        dz: float = 0,
+        droll: float = 0,
+        dpitch: float = 0,
+        dyaw: float = 0,
+        dbody_yaw: float = 0,
+    ) -> Movement:
+        """
+        Apply a small delta to the current pose and return a new Movement.
+        This is what WASDQE will use.
+        """
+        with self.pose_lock:
+            p = self.current_pose
+            new = Movement(
+                name="Current",
+                x=p.x + dx,
+                y=p.y + dy,
+                z=p.z + dz,
+                roll=p.roll + droll,
+                pitch=p.pitch + dpitch,
+                yaw=p.yaw + dyaw,
+                body_yaw=p.body_yaw + dbody_yaw,
+                left_antenna=p.left_antenna,
+                right_antenna=p.right_antenna,
+                duration=0.4,
+            )
+            # Optional clamping (adjust ranges as you like)
+            new.pitch = float(np.clip(new.pitch, -30, 30))
+            new.yaw = float(np.clip(new.yaw, -180, 180))
+            new.roll = float(np.clip(new.roll, -40, 40))
+            new.body_yaw = float(np.clip(new.body_yaw, -3, 3))
+            new.z = float(np.clip(new.z, -20, 50))
+            new.x = float(np.clip(new.x, -50, 50))
+            new.y = float(np.clip(new.y, -50, 50))
+            self.current_pose = new
+            return new
+    def reset_pose(self) -> Movement:
+        """Back to neutral / home pose."""
+        with self.pose_lock:
+            self.current_pose = Movement(
+                name="Current",
+                x=0, y=0, z=0,
+                roll=0, pitch=0, yaw=0,
+                body_yaw=0,
+                left_antenna=0,
+                right_antenna=0,
+                duration=0.3,
+            )
+            return self.current_pose
+    def get_pose_text(self) -> str:
+        """Human-readable pose info to show in the UI."""
+        with self.pose_lock:
+            p = self.current_pose
+            return (
+                f"Head position:\n"
+                f"  x={p.x:.1f}, y={p.y:.1f}, z={p.z:.1f}\n"
+                f"  roll={p.roll:.1f}, pitch={p.pitch:.1f}, yaw={p.yaw:.1f}\n"
+                f"Body:\n"
+                f"  body_yaw={p.body_yaw:.1f}"
+            )
+state = GlobalState()
+def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
+    """
+    Convert Movement -> head pose + body_yaw payload and fire it to the robot.
+    Used by the WASDQE controls.
+    """
+    if not (state.robot_ws and state.robot_loop):
+        return state.get_pose_text(), "⚠️ Robot not connected"
+    pose = create_head_pose(
+        x=mov.x, y=mov.y, z=mov.z,
+        roll=mov.roll, pitch=mov.pitch, yaw=mov.yaw,
+        degrees=True, mm=True,
+    )
+    payload = {
+        "type": "movement",
+        "movement": {
+            "head": pose.tolist(),
+            "body_yaw": mov.body_yaw,
+            "duration": mov.duration,
+        },
+    }
+    if mov.left_antenna is not None and mov.right_antenna is not None:
+        payload["movement"]["antennas"] = [
+            np.deg2rad(mov.right_antenna),
+            np.deg2rad(mov.left_antenna),
+        ]
+    asyncio.run_coroutine_threadsafe(
+        state.robot_ws.send_json(payload),
+        state.robot_loop,
+    )
+    return state.get_pose_text(), f"✅ {msg}"
 # --- 3. Controller Logic ---
 class MovementManager:
 @app.websocket("/audio_stream")
 async def audio_endpoint(ws: WebSocket):
+    """Full duplex audio channel between Robot/Sim and server."""
     await ws.accept()
     print("[Audio] Stream Connected")
+    async def robot_to_server():
+        """Robot mic -> server -> state.audio_queue (-> WebRTC -> browser)."""
+        try:
+            while True:
+                data = await ws.receive()
+                t = data.get("type")
+                if t == "websocket.disconnect":
+                    print("[Audio] Disconnected (recv)")
+                    break
+                if t == "websocket.receive":
+                    if data.get("bytes"):
+                        # Audio FROM robot
+                        state.push_audio(data["bytes"])
+                    elif data.get("text") == "ping":
+                        print("[Audio] Received ping")
+                    else:
+                        print(f"[Audio] Received unknown message: {data}")
+        except Exception as e:
+            print(f"[Audio] robot_to_server error: {e}")
+    async def server_to_robot():
+        """Server/operator audio -> robot speaker via WebSocket."""
+        loop = asyncio.get_running_loop()
+        try:
+            while True:
+                chunk: bytes = await loop.run_in_executor(
+                    None, state.get_audio_to_robot_blocking
+                )
+                if chunk is None:
+                    continue
+                await ws.send_bytes(chunk)
+        except Exception as e:
+            print(f"[Audio] server_to_robot error: {e}")
     try:
+        await asyncio.gather(robot_to_server(), server_to_robot())
+    finally:
+        print("[Audio] Stream Closed")
 # --- 5. Gradio Interface ---
+class RobotAudioHandler(StreamHandler):
+    """
+    FastRTC handler that connects browser WebRTC audio to the robot.
+    - receive(): audio from browser mic -> state.audio_to_robot_queue (then /audio_stream sends it to robot)
+    - emit(): audio from state.audio_queue (filled by /audio_stream robot_to_server) -> browser playback
+    """
+    def __init__(self) -> None:
+        super().__init__(input_sample_rate=AUDIO_SAMPLE_RATE, output_sample_rate=AUDIO_SAMPLE_RATE)
+    def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        """Called whenever the browser sends audio."""
+        if frame is None:
+            return
+        sr, array = frame
+        if array is None:
+            return
+        arr = np.asarray(array)
+        # Ensure mono
+        if arr.ndim > 1:
+            arr = arr[0]
+        # Convert to int16 and then to bytes for the robot
+        if arr.dtype != np.int16:
+            if np.issubdtype(arr.dtype, np.floating):
+                arr = np.clip(arr, -1.0, 1.0)
+                arr = (arr * 32767.0).astype(np.int16)
+            else:
+                arr = arr.astype(np.int16)
+        state.push_audio_to_robot(arr.tobytes())
+    def emit(self):
+        """
+        Called repeatedly by FastRTC to get audio to send to the browser.
+        Should return (sample_rate, np.ndarray[int16]) or None.
+        """
+        try:
+            sample_rate, frame_bytes = state.audio_queue.get(timeout=0.5)
+            audio = np.frombuffer(frame_bytes, dtype=np.int16).reshape(1, -1)
+            return sample_rate, audio
+        except queue.Empty:
+            # No audio right now, tell FastRTC to skip sending
+            return None
+    def copy(self) -> "RobotAudioHandler":
+        """
+        FastRTC will call this when it needs a new handler for a new session.
+        The handler itself is stateless; it always looks at GlobalState.
+        """
+        return RobotAudioHandler()
+    def shutdown(self) -> None:
+        """Called on session shutdown. Nothing to clean up for now."""
+        pass
+    def start_up(self) -> None:
+        """Called on session startup. Nothing special to do."""
+        pass
 def webrtc_audio_generator():
     """
     Generator for FastRTC.
         try:
             # Wait up to 1 second for data. If no data, loop again.
             # Do NOT use a short timeout combined with silence generation.
+            sample_rate, chunk_bytes = state.audio_queue.get(timeout=1.0)
             if chunk_bytes:
                 byte_buffer.extend(chunk_bytes)
         except queue.Empty:
             yield (AUDIO_SAMPLE_RATE, audio_int16)
+def handle_operator_audio(sr: int, audio: np.ndarray):
+    """
+    Called continuously by FastRTC when the browser sends mic audio.
+    `audio` is expected to be shape (channels, samples) or (samples,)
+    with dtype int16 or float32, depending on FastRTC config.
+    """
+    if audio is None:
+        return
+    arr = np.asarray(audio)
+    # Ensure mono and int16
+    if arr.ndim > 1:
+        arr = arr[0]  # take first channel
+    if arr.dtype != np.int16:
+        # For float32 in [-1, 1]
+        if np.issubdtype(arr.dtype, np.floating):
+            arr = np.clip(arr, -1.0, 1.0)
+            arr = (arr * 32767.0).astype(np.int16)
+        else:
+            arr = arr.astype(np.int16)
+    state.push_audio_to_robot(arr.tobytes())
+    # No UI output
+    return
 def webrtc_video_generator():
     """
     Generator for FastRTC WebRTC (mode='receive', modality='video').
         # Shape (H, W, 3), dtype uint8
         yield frame
+NUDGE_POS = 5.0      # mm or arbitrary units
+NUDGE_HEIGHT = 5.0   # z
+NUDGE_ANGLE = 5.0    # degrees
+NUDGE_BODY = 0.3     # degrees for body_yaw
+def move_w():
+    """
+    W: Move "forward" (e.g. towards positive y or z depending on your convention).
+    Here: we'll go +z (raise head) as an example.
+    """
+    mov = state.update_pose(dpitch=-NUDGE_HEIGHT)
+    return send_pose_to_robot(mov, "W (forward/up)")
+def move_s():
+    mov = state.update_pose(dpitch=NUDGE_HEIGHT)
+    return send_pose_to_robot(mov, "S (back/down)")
+def move_a():
+    """
+    A: turn left -> head yaw left + body yaw left.
+    """
+    mov = state.update_pose(dyaw=NUDGE_ANGLE*2)
+    return send_pose_to_robot(mov, "A (turn left)")
+def move_d():
+    """
+    D: turn right -> head yaw right + body yaw right.
+    """
+    mov = state.update_pose(dyaw=-NUDGE_ANGLE*2)
+    return send_pose_to_robot(mov, "D (turn right)")
+def move_q():
+    """
+    Q: tilt head up (pitch negative if you follow your earlier convention).
+    """
+    mov = state.update_pose(droll=-NUDGE_ANGLE)
+    return send_pose_to_robot(mov, "Q (tilt up)")
+def move_e():
+    """
+    E: tilt head down (pitch positive).
+    """
+    mov = state.update_pose(droll=NUDGE_ANGLE)
+    return send_pose_to_robot(mov, "E (tilt down)")
+def move_body_left():
+    mov = state.update_pose(dbody_yaw=NUDGE_BODY)
+    return send_pose_to_robot(mov, "Body Left (<)")
+def move_body_right():
+    mov = state.update_pose(dbody_yaw=-NUDGE_BODY)
+    return send_pose_to_robot(mov, "Body Right (>)")
+def center_pose():
+    mov = state.reset_pose()
+    return send_pose_to_robot(mov, "Reset pose")
 with gr.Blocks(title="Reachy Controller", theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🤖 Reachy Mini Controller")
         # --- LEFT COLUMN: Controls ---
         with gr.Column(scale=1):
             status_box = gr.Textbox(label="System Status", value=state.get_connection_status, every=2)
+            pose_box = gr.Textbox(
+            label="Current Pose",
+            value=state.get_pose_text,
+            every=0.5,
+            lines=8,
+            )
             with gr.Group():
                 gr.Markdown("### 🎧 Audio Listen")
                 robot_audio = WebRTC(
                     label="Robot Audio",
                     modality="audio",
+                    mode="send-receive",
+                    rtc_configuration=get_cloudflare_turn_credentials(),
+                    server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000),
+                    full_screen=False
                 )
+                # Use the handler directly, like in the FastRTC docs
                 robot_audio.stream(
+                    fn=RobotAudioHandler(),
+                    inputs=[robot_audio],
                     outputs=[robot_audio],
+                    time_limit=60,
                 )
+            # with gr.Group():
+            #     gr.Markdown("### 🎮 Playback")
+            #     auto_play = gr.Checkbox(label="Auto-play", value=True)
+            #     speed = gr.Slider(0.5, 2.0, 1.0, label="Speed")
+            #     with gr.Row():
+            #         play_btn = gr.Button("▶️ Play", variant="primary")
+            #         stop_btn = gr.Button("⏹️ Stop")
+            #     with gr.Row():
+            #         clear_btn = gr.Button("🗑️ Clear")
+            #         undo_btn = gr.Button("↶ Undo")
+            # queue_display = gr.Textbox(label="Queue", value=manager.get_queue_text, lines=10)
+            # --- Live movement control ---
+            with gr.Group():
+                gr.Markdown("### 🕹️ Keyboard Control (WASD + QE)")
+                # These buttons will be triggered by keyboard events via JS
+                btn_forward = gr.Button("Look up (W)", elem_id="btn-forward")
+                btn_back = gr.Button("Look down (S)", elem_id="btn-back")
+                btn_left = gr.Button("Left (A)", elem_id="btn-left")
+                btn_right = gr.Button("Right (D)", elem_id="btn-right")
+                btn_tilt_up = gr.Button("Tilt left (Q)", elem_id="btn-tilt-up")
+                btn_tilt_down = gr.Button("Tilt right (E)", elem_id="btn-tilt-down")
+                btn_body_left = gr.Button("Body Left (J)", elem_id="btn-body-left")
+                btn_body_right = gr.Button("Body Right (L)", elem_id="btn-body-right")
+                btn_center = gr.Button("Center (H)", elem_id="btn-center")
+                # Each button updates the pose_box text
+                btn_forward.click(move_w, outputs=[pose_box])
+                btn_back.click(move_s, outputs=[pose_box])
+                btn_left.click(move_a, outputs=[pose_box])
+                btn_right.click(move_d, outputs=[pose_box])
+                btn_tilt_up.click(move_q, outputs=[pose_box])
+                btn_tilt_down.click(move_e, outputs=[pose_box])
+                btn_body_left.click(move_body_left, outputs=[pose_box])
+                btn_body_right.click(move_body_right, outputs=[pose_box])
+                btn_center.click(center_pose, outputs=[pose_box])
         # --- RIGHT COLUMN: View ---
         with gr.Column(scale=2):
+            robot_video = WebRTC(
+                label="Robot Video",
+                modality="video",
+                mode="receive",
+                rtc_configuration=get_cloudflare_turn_credentials(),
+                server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000)
+            )
+            robot_video.stream(
+                fn=lambda: webrtc_video_generator(),
+                inputs=[],
+                outputs=[robot_video],
+                trigger=listen_btn.click,
+            )
+            # html_code = """
+            #     <html>
+            #     <body>
+            #         <img src="/video_feed" style="width: 100%; max-width: 1080px; border-radius: 8px;">
+            #     </body>
+            #     </html>
+            #     """
+            # sim_view = gr.HTML(value=html_code, label="🎬 Robot Simulation")
+            # # --- Movement Builders ---
+            # with gr.Tabs():
+            #     with gr.Tab("✨ Presets & Sequences"):
+            #         gr.Markdown("### Quick Actions")
+            #         with gr.Row(variant="panel"):
+            #             for name in PRESETS:
+            #                 btn = gr.Button(name, size="sm")
+            #                 btn.click(manager.add_preset, inputs=[gr.State(name)], outputs=[queue_display, status_box])
+            #         gr.Markdown("### Sequences")
+            #         with gr.Row():
+            #             for seq in SEQUENCES:
+            #                 btn = gr.Button(f"🎬 {seq}", size="sm")
+            #                 btn.click(manager.add_sequence, inputs=[gr.State(seq)], outputs=[queue_display, status_box])
+            #     with gr.Tab("🛠️ Custom Move"):
+            #         with gr.Row():
+            #             c_x = gr.Slider(-50, 50, 0, label="X")
+            #             c_y = gr.Slider(-50, 50, 0, label="Y")
+            #             c_z = gr.Slider(-20, 50, 0, label="Z")
+            #         with gr.Row():
+            #             c_r = gr.Slider(-30, 30, 0, label="Roll")
+            #             c_p = gr.Slider(-30, 30, 0, label="Pitch")
+            #             c_y_aw = gr.Slider(-45, 45, 0, label="Yaw")
+            #         with gr.Row():
+            #             c_la = gr.Slider(-180, 180, 0, label="Left Ant")
+            #             c_ra = gr.Slider(-180, 180, 0, label="Right Ant")
+            #         c_dur = gr.Slider(0.1, 5.0, 1.0, label="Duration")
+            #         c_add = gr.Button("➕ Add Custom Move", variant="primary")
+            #         def _add_custom(x,y,z,r,p,yw,la,ra,d):
+            #             m = Movement("Custom", x,y,z,r,p,yw,la,ra,d)
+            #             return manager.add_movement(m)
+            #         c_add.click(_add_custom,
+            #                     inputs=[c_x, c_y, c_z, c_r, c_p, c_y_aw, c_la, c_ra, c_dur],
+            #                     outputs=[queue_display, status_box])
     # --- Event Wiring ---
+    # auto_play.change(lambda x: setattr(manager, 'auto_play', x), inputs=[auto_play])
+    # play_btn.click(manager.play_queue, inputs=[speed], outputs=[queue_display, status_box])
+    # stop_btn.click(manager.stop_playback, outputs=[queue_display, status_box])
+    # clear_btn.click(manager.clear_queue, outputs=[queue_display, status_box])
+    # undo_btn.click(manager.remove_last, outputs=[queue_display, status_box])
+    demo.load(
+        None,
+        None,
+        None,
+        js="""
+        () => {
+            const keyMap = {
+                'w': 'btn-forward',
+                's': 'btn-back',
+                'a': 'btn-left',
+                'd': 'btn-right',
+                'q': 'btn-tilt-up',
+                'e': 'btn-tilt-down',
+                'h': 'btn-center',
+                'j': 'btn-body-left',
+                'l': 'btn-body-right',
+            };
+            let lastPressed = {};
+            const REPEAT_MS = 120;  // minimum time between repeated presses
+            document.addEventListener('keydown', (ev) => {
+                const key = ev.key.toLowerCase();
+                const id = keyMap[key];
+                if (!id) return;
+                const now = Date.now();
+                if (lastPressed[key] && now - lastPressed[key] < REPEAT_MS) {
+                    return;  // simple debounce
+                }
+                lastPressed[key] = now;
+                // Prevent page scrolling with space, etc
+                ev.preventDefault();
+                const btn = document.getElementById(id);
+                if (btn) {
+                    btn.click();
+                }
+            });
+            console.log('Keyboard control ready: WASD for x/y, Q/E for pitch, J/L for body yaw, H for center');
+        }
+        """,
+    )
 # --- 6. Mount & Run ---
 app = gr.mount_gradio_app(app, demo, path="/")