andito HF Staff commited on
Commit
113c326
·
verified ·
1 Parent(s): 156b337

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +703 -575
app.py CHANGED
@@ -1,16 +1,14 @@
1
  """
2
- Reachy Mini Controller - Cleaned Version
3
  A centralized server that listens for Robot connections and hosts a Gradio control interface.
4
  """
5
 
6
  import asyncio
7
- import io
8
- import json
9
  import threading
10
  import time
11
  import queue
12
  from dataclasses import dataclass
13
- from typing import List, Optional
14
 
15
  import cv2
16
  import gradio as gr
@@ -18,24 +16,40 @@ import numpy as np
18
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect
19
  from fastapi.responses import StreamingResponse
20
  import uvicorn
21
- from fastrtc import WebRTC, StreamHandler, get_cloudflare_turn_credentials_async, get_cloudflare_turn_credentials
22
 
23
- # Try to import the utility, handle error if running in standalone test without library
24
- try:
25
- from reachy_mini.utils import create_head_pose
26
- except ImportError:
27
- print("⚠️ Warning: reachy_mini module not found. Mocking create_head_pose for testing.")
28
- def create_head_pose(**kwargs): return np.array([0,0,0])
29
 
30
  AUDIO_SAMPLE_RATE = 16000 # respeaker samplerate
31
 
32
- import os
33
- async def get_credentials():
34
- # Will use HF_TOKEN env var inside the Space. There is a limit of 10GB per month: https://fastrtc.org/deployment/
35
- return await get_cloudflare_turn_credentials_async(hf_token=os.getenv("HF_TOKEN"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
 
38
- # --- 1. Data Models & Presets ---
 
 
 
39
  @dataclass
40
  class Movement:
41
  name: str
@@ -50,110 +64,109 @@ class Movement:
50
  right_antenna: Optional[float] = None
51
  duration: float = 1.0
52
 
53
- PRESETS = {
54
- "Home": Movement("Home", 0, 0, 0, 0, 0, 0, 0, 0, 0),
55
- "Look Left": Movement("Look Left", 0, 0, 0, 0, 0, 30, 1, 0, 0),
56
- "Look Right": Movement("Look Right", 0, 0, 0, 0, 0, -30, -1, 0, 0),
57
- "Look Up": Movement("Look Up", 0, 0, 0, 0, -20, 0, 0, 0, 0),
58
- "Look Down": Movement("Look Down", 0, 0, 0, 0, 15, 0, 0, 0, 0),
59
- "Curious": Movement("Curious", 10, 0, 10, 15, -10, -15, 0, 45, -45),
60
- "Excited": Movement("Excited", 0, 0, 20, 0, -15, 0, 0, 90, 90),
61
- "Shy": Movement("Shy", -10, 0, -10, 10, 10, 20, 0, -30, 30),
62
- }
63
 
64
- SEQUENCES = {
65
- "Wave": ["Home", "Look Left", "Look Right", "Look Left", "Look Right", "Home"],
66
- "Nod": ["Home", "Look Down", "Look Up", "Look Down", "Home"],
67
- "Excited Dance": ["Home", "Excited", "Look Left", "Look Right", "Home"],
68
- }
69
 
70
- # --- 2. Global State Management ---
71
  class GlobalState:
72
  """
73
  Singleton-style class to manage shared state between FastAPI (WebSockets)
74
- and Gradio (UI Thread).
75
  """
76
  def __init__(self):
77
  # Connection handles
78
  self.robot_ws: Optional[WebSocket] = None
79
  self.robot_loop: Optional[asyncio.AbstractEventLoop] = None
80
-
81
  # Video Stream Data
82
  self.frame_lock = threading.Lock()
83
  self.black_frame = np.zeros((640, 640, 3), dtype=np.uint8)
84
- _, buffer = cv2.imencode('.jpg', self.black_frame)
85
  self.latest_frame_bytes = buffer.tobytes()
86
  self.latest_frame_ts = time.time()
87
 
88
  # Audio from robot -> browser
89
- # Queue of (sample_rate: int, audio: np.ndarray[int16, shape=(1, N)])
90
- self.audio_queue = queue.Queue()
91
 
92
- # Audio from operator/server -> robot
93
- self.audio_to_robot_queue = queue.Queue()
94
 
95
- # --- Live pose state (for WASDQE control) ---
96
  self.pose_lock = threading.Lock()
97
  self.current_pose = Movement(
98
  name="Current",
99
- x=0, y=0, z=0,
100
- roll=0, pitch=0, yaw=0,
 
 
 
 
101
  body_yaw=0,
102
  left_antenna=0,
103
  right_antenna=0,
104
  duration=0.2,
105
  )
106
 
 
107
 
108
- def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop):
109
  self.robot_ws = ws
110
  self.robot_loop = loop
111
 
112
- def update_frame(self, frame_bytes: bytes):
 
 
 
 
 
 
113
  with self.frame_lock:
114
  self.latest_frame_bytes = frame_bytes
115
  self.latest_frame_ts = time.time()
116
 
117
- def push_audio(self, audio_bytes: bytes):
118
- """
119
- Pushes raw audio bytes.
120
- If the queue is full (meaning we are lagging), throw away the OLDEST audio.
121
- """
122
- MAX_QUEUE_SIZE = 2 # keep latency low
123
-
124
- while self.audio_queue.qsize() >= MAX_QUEUE_SIZE:
125
- try:
126
- print("Dropping oldest audio FROM robot, queue size is", self.audio_queue.qsize())
127
- self.audio_queue.get_nowait()
128
- except queue.Empty:
129
- break
130
-
131
- self.audio_queue.put((AUDIO_SAMPLE_RATE, audio_bytes))
132
 
133
- def push_audio_to_robot(self, audio_bytes: bytes):
134
- """
135
- Audio coming FROM the operator/server, going TO the robot.
136
- """
137
- MAX_QUEUE_SIZE = 2
138
- while self.audio_to_robot_queue.qsize() >= MAX_QUEUE_SIZE:
139
  try:
140
- print("Dropping oldest audio TO robot, queue size is", self.audio_to_robot_queue.qsize())
141
- self.audio_to_robot_queue.get_nowait()
 
142
  except queue.Empty:
143
  break
 
 
 
 
 
 
 
 
 
144
 
145
- self.audio_to_robot_queue.put(audio_bytes)
 
 
 
 
 
 
146
 
147
  def get_audio_to_robot_blocking(self) -> bytes:
148
- """
149
- Blocking get for the sender task in /audio_stream.
150
- """
151
- return self.audio_to_robot_queue.get()
 
 
152
 
153
  def get_connection_status(self) -> str:
154
- if self.robot_ws:
155
- return "✅ Robot Connected"
156
- return "🔴 Waiting for Robot..."
157
 
158
  def update_pose(
159
  self,
@@ -165,10 +178,6 @@ class GlobalState:
165
  dyaw: float = 0,
166
  dbody_yaw: float = 0,
167
  ) -> Movement:
168
- """
169
- Apply a small delta to the current pose and return a new Movement.
170
- This is what WASDQE will use.
171
- """
172
  with self.pose_lock:
173
  p = self.current_pose
174
 
@@ -186,7 +195,7 @@ class GlobalState:
186
  duration=0.4,
187
  )
188
 
189
- # Optional clamping (adjust ranges as you like)
190
  new.pitch = float(np.clip(new.pitch, -30, 30))
191
  new.yaw = float(np.clip(new.yaw, -180, 180))
192
  new.roll = float(np.clip(new.roll, -40, 40))
@@ -199,12 +208,15 @@ class GlobalState:
199
  return new
200
 
201
  def reset_pose(self) -> Movement:
202
- """Back to neutral / home pose."""
203
  with self.pose_lock:
204
  self.current_pose = Movement(
205
  name="Current",
206
- x=0, y=0, z=0,
207
- roll=0, pitch=0, yaw=0,
 
 
 
 
208
  body_yaw=0,
209
  left_antenna=0,
210
  right_antenna=0,
@@ -213,31 +225,37 @@ class GlobalState:
213
  return self.current_pose
214
 
215
  def get_pose_text(self) -> str:
216
- """Human-readable pose info to show in the UI."""
217
  with self.pose_lock:
218
  p = self.current_pose
219
  return (
220
- f"Head position:\n"
221
  f" x={p.x:.1f}, y={p.y:.1f}, z={p.z:.1f}\n"
222
  f" roll={p.roll:.1f}, pitch={p.pitch:.1f}, yaw={p.yaw:.1f}\n"
223
- f"Body:\n"
224
  f" body_yaw={p.body_yaw:.1f}"
225
  )
226
 
 
227
  state = GlobalState()
228
 
 
 
 
 
 
229
  def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
230
- """
231
- Convert Movement -> head pose + body_yaw payload and fire it to the robot.
232
- Used by the WASDQE controls.
233
- """
234
  if not (state.robot_ws and state.robot_loop):
235
  return state.get_pose_text(), "⚠️ Robot not connected"
236
 
237
  pose = create_head_pose(
238
- x=mov.x, y=mov.y, z=mov.z,
239
- roll=mov.roll, pitch=mov.pitch, yaw=mov.yaw,
240
- degrees=True, mm=True,
 
 
 
 
 
241
  )
242
 
243
  payload = {
@@ -262,182 +280,88 @@ def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
262
 
263
  return state.get_pose_text(), f"✅ {msg}"
264
 
265
- # --- 3. Controller Logic ---
266
- class MovementManager:
267
- def __init__(self):
268
- self.queue: List[Movement] = []
269
- self.is_playing = False
270
- self.auto_play = True
271
- self.playback_speed = 1.0
272
- self.play_thread: Optional[threading.Thread] = None
273
-
274
- def add_movement(self, mov: Movement):
275
- self.queue.append(mov)
276
- if self.auto_play and not self.is_playing:
277
- self.play_queue()
278
- return self.get_queue_text(), f"✅ Added {mov.name}"
279
-
280
- def add_preset(self, name: str):
281
- if name in PRESETS:
282
- return self.add_movement(PRESETS[name])
283
- return self.get_queue_text(), "❌ Unknown Preset"
284
-
285
- def add_sequence(self, name: str):
286
- if name in SEQUENCES:
287
- for move_name in SEQUENCES[name]:
288
- self.queue.append(PRESETS[move_name])
289
- if self.auto_play and not self.is_playing:
290
- self.play_queue()
291
- return self.get_queue_text(), f"✅ Added Sequence: {name}"
292
- return self.get_queue_text(), "❌ Unknown Sequence"
293
-
294
- def clear_queue(self):
295
- self.queue.clear()
296
- self.is_playing = False
297
- return self.get_queue_text(), "🗑️ Queue Cleared"
298
-
299
- def remove_last(self):
300
- if self.queue:
301
- self.queue.pop()
302
- return self.get_queue_text(), "🗑️ Removed Last"
303
-
304
- def get_queue_text(self):
305
- if not self.queue:
306
- return "📋 Queue Empty"
307
-
308
- lines = ["📋 Current Queue:"]
309
- for i, m in enumerate(self.queue, 1):
310
- indicator = "▶️" if (i==1 and self.is_playing) else " "
311
- lines.append(f"{indicator} {i}. {m.name} ({m.duration}s)")
312
-
313
- return "\n".join(lines)
314
-
315
- def play_queue(self, speed=None):
316
- if speed: self.playback_speed = speed
317
- if self.is_playing: return self.get_queue_text(), "⚠️ Already Playing"
318
- if not self.queue: return self.get_queue_text(), "⚠️ Queue Empty"
319
-
320
- self.is_playing = True
321
- self.play_thread = threading.Thread(target=self._worker, daemon=True)
322
- self.play_thread.start()
323
- return self.get_queue_text(), "▶️ Playing..."
324
-
325
- def stop_playback(self):
326
- self.is_playing = False
327
- if self.play_thread:
328
- self.play_thread.join(timeout=1.0)
329
- return self.get_queue_text(), "⏹️ Stopped"
330
-
331
- def _worker(self):
332
- """Background thread that processes the queue."""
333
- idx = 0
334
- try:
335
- while self.is_playing and idx < len(self.queue):
336
- move = self.queue[idx]
337
-
338
- # 1. Build Payload
339
- pose = create_head_pose(
340
- x=move.x, y=move.y, z=move.z,
341
- roll=move.roll, pitch=move.pitch, yaw=move.yaw,
342
- degrees=True, mm=True
343
- )
344
-
345
- payload = {
346
- "type": "movement",
347
- "movement": {
348
- "head": pose.tolist(),
349
- "body_yaw": move.body_yaw,
350
- "duration": move.duration / self.playback_speed
351
- }
352
- }
353
-
354
- # Add antennas if specified
355
- if move.left_antenna is not None and move.right_antenna is not None:
356
- payload["movement"]["antennas"] = [
357
- np.deg2rad(move.right_antenna),
358
- np.deg2rad(move.left_antenna)
359
- ]
360
-
361
- # 2. Send to Robot (Async safe)
362
- if state.robot_ws and state.robot_loop:
363
- asyncio.run_coroutine_threadsafe(
364
- state.robot_ws.send_json(payload),
365
- state.robot_loop
366
- )
367
- else:
368
- print("⚠️ Robot not connected, skipping command.")
369
-
370
- # 3. Wait for move to finish (blocking thread, not async loop)
371
- time.sleep(move.duration / self.playback_speed)
372
- idx += 1
373
-
374
- # Loop finished
375
- if not self.auto_play:
376
- self.is_playing = False
377
- else:
378
- # In auto-play, we stay "playing" but wait for new items
379
- self.is_playing = False
380
 
381
- except Exception as e:
382
- print(f"Playback Error: {e}")
383
- self.is_playing = False
384
 
385
- def generate_mjpeg_stream(self):
386
- last_timestamp = 0.0
387
-
388
- while True:
389
- # 1. Check if frame has changed
390
- with state.frame_lock:
391
- current_bytes = state.latest_frame_bytes
392
- current_timestamp = state.latest_frame_ts
393
-
394
- # 2. Only yield if this is a new frame
395
- if current_timestamp > last_timestamp:
396
- last_timestamp = current_timestamp
397
- if current_bytes is not None:
398
- yield (b'--frame\r\n'
399
- b'Content-Type: image/jpeg\r\n\r\n' + current_bytes + b'\r\n')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  else:
401
- # If no new frame, sleep a bit longer to save CPU
402
- time.sleep(0.02)
403
- continue
404
-
405
- # Cap FPS slightly to prevent saturation
406
- time.sleep(0.02)
407
 
408
- manager = MovementManager()
409
 
 
 
 
410
 
411
- # --- 4. FastAPI & WebSocket Logic ---
412
  app = FastAPI()
413
 
 
414
  @app.websocket("/robot")
415
  async def robot_endpoint(ws: WebSocket):
416
- """Endpoint for the Robot to connect to (Control Channel)."""
417
  await ws.accept()
418
  state.set_robot_connection(ws, asyncio.get_running_loop())
419
- print("[System] Robot Connected!")
420
-
421
  try:
422
- # Heartbeat loop
423
  while True:
424
- # We wait for messages, but mostly we just hold the connection open
425
- # and send commands via the state.robot_ws handle.
426
- msg = await ws.receive()
427
- if msg["type"] == "websocket.disconnect":
428
  break
429
  except (WebSocketDisconnect, Exception):
430
  print("[System] Robot Disconnected")
431
  finally:
432
- state.robot_ws = None
 
433
 
434
  @app.get("/video_feed")
435
  def video_feed():
436
  return StreamingResponse(
437
- manager.generate_mjpeg_stream(),
438
- media_type="multipart/x-mixed-replace; boundary=frame"
439
  )
440
 
 
441
  @app.websocket("/video_stream")
442
  async def stream_endpoint(ws: WebSocket):
443
  """Endpoint for Robot/Sim to send video frames."""
@@ -445,10 +369,15 @@ async def stream_endpoint(ws: WebSocket):
445
  try:
446
  while True:
447
  msg = await ws.receive()
448
- if "bytes" in msg and msg["bytes"]:
449
- state.update_frame(msg["bytes"])
 
 
 
450
  except Exception:
451
- pass
 
 
452
 
453
  @app.websocket("/audio_stream")
454
  async def audio_endpoint(ws: WebSocket):
@@ -457,66 +386,69 @@ async def audio_endpoint(ws: WebSocket):
457
  print("[Audio] Stream Connected")
458
 
459
  async def robot_to_server():
460
- """Robot mic -> server -> state.audio_queue (-> WebRTC -> browser)."""
461
  try:
462
  while True:
463
  data = await ws.receive()
464
  t = data.get("type")
465
-
466
  if t == "websocket.disconnect":
467
  print("[Audio] Disconnected (recv)")
468
  break
469
 
470
  if t == "websocket.receive":
471
  if data.get("bytes"):
472
- # Audio FROM robot
473
- state.push_audio(data["bytes"])
474
  elif data.get("text") == "ping":
475
  print("[Audio] Received ping")
476
- else:
477
- print(f"[Audio] Received unknown message: {data}")
478
  except Exception as e:
479
  print(f"[Audio] robot_to_server error: {e}")
480
 
481
  async def server_to_robot():
482
- """Server/operator audio -> robot speaker via WebSocket."""
483
  loop = asyncio.get_running_loop()
484
  try:
485
  while True:
486
  chunk: bytes = await loop.run_in_executor(
487
  None, state.get_audio_to_robot_blocking
488
  )
489
- if chunk is None:
490
- continue
491
- await ws.send_bytes(chunk)
 
492
  except Exception as e:
493
  print(f"[Audio] server_to_robot error: {e}")
494
 
495
  try:
496
  await asyncio.gather(robot_to_server(), server_to_robot())
 
 
497
  finally:
498
  print("[Audio] Stream Closed")
499
 
500
 
501
- # --- 5. Gradio Interface ---
 
 
502
 
503
  class RobotAudioHandler(StreamHandler):
504
  """
505
  FastRTC handler that connects browser WebRTC audio to the robot.
506
 
507
- - receive(): audio from browser mic -> state.audio_to_robot_queue (then /audio_stream sends it to robot)
508
- - emit(): audio from state.audio_queue (filled by /audio_stream robot_to_server) -> browser playback
509
  """
510
 
511
  def __init__(self) -> None:
512
- super().__init__(input_sample_rate=AUDIO_SAMPLE_RATE, output_sample_rate=AUDIO_SAMPLE_RATE)
 
 
 
513
 
514
- def receive(self, frame: tuple[int, np.ndarray]) -> None:
515
- """Called whenever the browser sends audio."""
516
  if frame is None:
517
  return
518
 
519
- sr, array = frame
520
  if array is None:
521
  return
522
 
@@ -526,7 +458,6 @@ class RobotAudioHandler(StreamHandler):
526
  if arr.ndim > 1:
527
  arr = arr[0]
528
 
529
- # Convert to int16 and then to bytes for the robot
530
  if arr.dtype != np.int16:
531
  if np.issubdtype(arr.dtype, np.floating):
532
  arr = np.clip(arr, -1.0, 1.0)
@@ -537,382 +468,579 @@ class RobotAudioHandler(StreamHandler):
537
  state.push_audio_to_robot(arr.tobytes())
538
 
539
  def emit(self):
540
- """
541
- Called repeatedly by FastRTC to get audio to send to the browser.
542
-
543
- Should return (sample_rate, np.ndarray[int16]) or None.
544
- """
545
  try:
546
  sample_rate, frame_bytes = state.audio_queue.get(timeout=0.5)
547
  audio = np.frombuffer(frame_bytes, dtype=np.int16).reshape(1, -1)
548
  return sample_rate, audio
549
  except queue.Empty:
550
- # No audio right now, tell FastRTC to skip sending
551
  return None
552
 
553
  def copy(self) -> "RobotAudioHandler":
554
- """
555
- FastRTC will call this when it needs a new handler for a new session.
556
- The handler itself is stateless; it always looks at GlobalState.
557
- """
558
  return RobotAudioHandler()
559
 
560
  def shutdown(self) -> None:
561
- """Called on session shutdown. Nothing to clean up for now."""
562
  pass
563
 
564
  def start_up(self) -> None:
565
- """Called on session startup. Nothing special to do."""
566
  pass
567
 
568
 
569
- def webrtc_audio_generator():
570
- """
571
- Generator for FastRTC.
572
- """
573
- # Clear old data to start fresh
574
- with state.audio_queue.mutex:
575
- state.audio_queue.queue.clear()
576
 
577
- # OPTIMIZATION: Reduce target samples.
578
- # 4096 samples @ 16kHz is 256ms of latency built-in!
579
- # Try 1024 (64ms) or 512 (32ms) for lower latency.
580
- TARGET_SAMPLES = 1024
581
- byte_buffer = bytearray()
582
 
583
- while True:
584
- try:
585
- # Wait up to 1 second for data. If no data, loop again.
586
- # Do NOT use a short timeout combined with silence generation.
587
- sample_rate, chunk_bytes = state.audio_queue.get(timeout=1.0)
588
- if chunk_bytes:
589
- byte_buffer.extend(chunk_bytes)
590
- except queue.Empty:
591
- # If we really have no data for a long time, just continue waiting.
592
- # Do NOT yield silence here.
593
- continue
594
 
595
- # Only yield when we have enough data
596
- while len(byte_buffer) >= TARGET_SAMPLES * 2: # int16 = 2 bytes
597
- read_size = TARGET_SAMPLES * 2
598
- out_bytes = byte_buffer[:read_size]
599
- byte_buffer = byte_buffer[read_size:]
 
 
 
 
 
600
 
601
- audio_int16 = np.frombuffer(out_bytes, dtype=np.int16)
602
- audio_int16 = audio_int16.reshape(1, -1)
603
 
604
- yield (AUDIO_SAMPLE_RATE, audio_int16)
 
 
 
 
605
 
606
 
607
- def handle_operator_audio(sr: int, audio: np.ndarray):
608
- """
609
- Called continuously by FastRTC when the browser sends mic audio.
610
 
611
- `audio` is expected to be shape (channels, samples) or (samples,)
612
- with dtype int16 or float32, depending on FastRTC config.
613
- """
614
- if audio is None:
615
- return
616
-
617
- arr = np.asarray(audio)
618
- # Ensure mono and int16
619
- if arr.ndim > 1:
620
- arr = arr[0] # take first channel
621
-
622
- if arr.dtype != np.int16:
623
- # For float32 in [-1, 1]
624
- if np.issubdtype(arr.dtype, np.floating):
625
- arr = np.clip(arr, -1.0, 1.0)
626
- arr = (arr * 32767.0).astype(np.int16)
627
- else:
628
- arr = arr.astype(np.int16)
629
 
630
- state.push_audio_to_robot(arr.tobytes())
631
- # No UI output
632
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
 
635
- def webrtc_video_generator():
636
- """
637
- Generator for FastRTC WebRTC (mode='receive', modality='video').
638
- It reads JPEG bytes from state.latest_frame_bytes, decodes them with OpenCV,
639
- and yields HxWx3 uint8 frames as expected by FastRTC.
640
- """
641
- last_ts = 0.0
642
- frame = state.black_frame.copy()
 
 
 
 
643
 
644
- while True:
645
- with state.frame_lock:
646
- ts = state.latest_frame_ts
647
- frame_bytes = state.latest_frame_bytes
 
 
 
648
 
649
- if ts > last_ts and frame_bytes:
650
- last_ts = ts
651
- np_bytes = np.frombuffer(frame_bytes, dtype=np.uint8)
652
- frame = cv2.imdecode(np_bytes, cv2.IMREAD_COLOR)
653
- if frame is None:
654
- frame = state.black_frame.copy()
655
- # Shape (H, W, 3), dtype uint8
656
- yield frame
 
 
 
 
 
 
 
 
657
 
658
- NUDGE_POS = 5.0 # mm or arbitrary units
659
- NUDGE_HEIGHT = 5.0 # z
660
- NUDGE_ANGLE = 5.0 # degrees
661
- NUDGE_BODY = 0.3 # degrees for body_yaw
662
 
663
- def move_w():
664
- """
665
- W: Move "forward" (e.g. towards positive y or z depending on your convention).
666
- Here: we'll go +z (raise head) as an example.
667
- """
668
- mov = state.update_pose(dpitch=-NUDGE_HEIGHT)
669
- return send_pose_to_robot(mov, "W (forward/up)")
 
 
 
 
 
670
 
671
- def move_s():
672
- mov = state.update_pose(dpitch=NUDGE_HEIGHT)
673
- return send_pose_to_robot(mov, "S (back/down)")
 
 
 
 
 
674
 
675
- def move_a():
676
- """
677
- A: turn left -> head yaw left + body yaw left.
678
- """
679
- mov = state.update_pose(dyaw=NUDGE_ANGLE*2)
680
- return send_pose_to_robot(mov, "A (turn left)")
 
 
681
 
682
- def move_d():
683
- """
684
- D: turn right -> head yaw right + body yaw right.
685
- """
686
- mov = state.update_pose(dyaw=-NUDGE_ANGLE*2)
687
- return send_pose_to_robot(mov, "D (turn right)")
688
 
689
- def move_q():
690
- """
691
- Q: tilt head up (pitch negative if you follow your earlier convention).
692
- """
693
- mov = state.update_pose(droll=-NUDGE_ANGLE)
694
- return send_pose_to_robot(mov, "Q (tilt up)")
 
 
 
695
 
696
- def move_e():
697
- """
698
- E: tilt head down (pitch positive).
699
- """
700
- mov = state.update_pose(droll=NUDGE_ANGLE)
701
- return send_pose_to_robot(mov, "E (tilt down)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702
 
703
- def move_body_left():
704
- mov = state.update_pose(dbody_yaw=NUDGE_BODY)
705
- return send_pose_to_robot(mov, "Body Left (<)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
 
707
- def move_body_right():
708
- mov = state.update_pose(dbody_yaw=-NUDGE_BODY)
709
- return send_pose_to_robot(mov, "Body Right (>)")
 
 
 
 
 
 
 
 
 
 
 
710
 
711
- def center_pose():
712
- mov = state.reset_pose()
713
- return send_pose_to_robot(mov, "Reset pose")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
 
 
 
 
715
 
716
- with gr.Blocks(title="Reachy Controller", theme=gr.themes.Soft()) as demo:
717
-
718
- gr.Markdown("## 🤖 Reachy Mini Controller")
 
 
 
 
 
 
 
 
 
719
 
 
 
 
 
 
 
 
 
 
 
 
720
  with gr.Row():
721
- # --- LEFT COLUMN: Controls ---
722
  with gr.Column(scale=1):
723
- status_box = gr.Textbox(label="System Status", value=state.get_connection_status, every=2)
724
- pose_box = gr.Textbox(
725
- label="Current Pose",
726
- value=state.get_pose_text,
727
- every=0.5,
728
- lines=8,
 
729
  )
730
- with gr.Group():
731
- gr.Markdown("### ���� Audio Listen")
732
-
733
- # Start button for the WebRTC stream
734
- listen_btn = gr.Button("🔊 Start Listening", variant="secondary")
735
-
736
- # FastRTC WebRTC component in receive mode, audio only
737
  robot_audio = WebRTC(
738
- label="Robot Audio",
739
  modality="audio",
740
  mode="send-receive",
741
- rtc_configuration=get_cloudflare_turn_credentials(),
742
- server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000),
743
- full_screen=False
744
  )
745
-
746
- # Use the handler directly, like in the FastRTC docs
747
  robot_audio.stream(
748
  fn=RobotAudioHandler(),
749
  inputs=[robot_audio],
750
  outputs=[robot_audio],
751
- time_limit=60,
752
  )
753
-
754
-
755
- # with gr.Group():
756
- # gr.Markdown("### 🎮 Playback")
757
- # auto_play = gr.Checkbox(label="Auto-play", value=True)
758
- # speed = gr.Slider(0.5, 2.0, 1.0, label="Speed")
759
-
760
- # with gr.Row():
761
- # play_btn = gr.Button("▶️ Play", variant="primary")
762
- # stop_btn = gr.Button("⏹️ Stop")
763
-
764
- # with gr.Row():
765
- # clear_btn = gr.Button("🗑️ Clear")
766
- # undo_btn = gr.Button(" Undo")
767
-
768
- # queue_display = gr.Textbox(label="Queue", value=manager.get_queue_text, lines=10)
769
-
770
- # --- Live movement control ---
771
- with gr.Group():
772
- gr.Markdown("### 🕹️ Keyboard Control (WASD + QE)")
773
-
774
- # These buttons will be triggered by keyboard events via JS
775
- btn_forward = gr.Button("Look up (W)", elem_id="btn-forward")
776
- btn_back = gr.Button("Look down (S)", elem_id="btn-back")
777
- btn_left = gr.Button("Left (A)", elem_id="btn-left")
778
- btn_right = gr.Button("Right (D)", elem_id="btn-right")
779
- btn_tilt_up = gr.Button("Tilt left (Q)", elem_id="btn-tilt-up")
780
- btn_tilt_down = gr.Button("Tilt right (E)", elem_id="btn-tilt-down")
781
- btn_body_left = gr.Button("Body Left (J)", elem_id="btn-body-left")
782
- btn_body_right = gr.Button("Body Right (L)", elem_id="btn-body-right")
783
- btn_center = gr.Button("Center (H)", elem_id="btn-center")
784
-
785
- # Each button updates the pose_box text
786
- btn_forward.click(move_w, outputs=[pose_box])
787
- btn_back.click(move_s, outputs=[pose_box])
788
- btn_left.click(move_a, outputs=[pose_box])
789
- btn_right.click(move_d, outputs=[pose_box])
790
- btn_tilt_up.click(move_q, outputs=[pose_box])
791
- btn_tilt_down.click(move_e, outputs=[pose_box])
792
- btn_body_left.click(move_body_left, outputs=[pose_box])
793
- btn_body_right.click(move_body_right, outputs=[pose_box])
794
- btn_center.click(center_pose, outputs=[pose_box])
795
-
796
- # --- RIGHT COLUMN: View ---
797
- with gr.Column(scale=2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
  robot_video = WebRTC(
799
- label="Robot Video",
800
  modality="video",
801
  mode="receive",
802
- rtc_configuration=get_cloudflare_turn_credentials(),
803
- server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000)
 
804
  )
805
  robot_video.stream(
806
- fn=lambda: webrtc_video_generator(),
807
  inputs=[],
808
  outputs=[robot_video],
809
  trigger=listen_btn.click,
810
  )
811
- # html_code = """
812
- # <html>
813
- # <body>
814
- # <img src="/video_feed" style="width: 100%; max-width: 1080px; border-radius: 8px;">
815
- # </body>
816
- # </html>
817
- # """
818
- # sim_view = gr.HTML(value=html_code, label="🎬 Robot Simulation")
819
-
820
- # # --- Movement Builders ---
821
- # with gr.Tabs():
822
- # with gr.Tab("✨ Presets & Sequences"):
823
- # gr.Markdown("### Quick Actions")
824
- # with gr.Row(variant="panel"):
825
- # for name in PRESETS:
826
- # btn = gr.Button(name, size="sm")
827
- # btn.click(manager.add_preset, inputs=[gr.State(name)], outputs=[queue_display, status_box])
828
-
829
- # gr.Markdown("### Sequences")
830
- # with gr.Row():
831
- # for seq in SEQUENCES:
832
- # btn = gr.Button(f"🎬 {seq}", size="sm")
833
- # btn.click(manager.add_sequence, inputs=[gr.State(seq)], outputs=[queue_display, status_box])
834
-
835
- # with gr.Tab("🛠️ Custom Move"):
836
- # with gr.Row():
837
- # c_x = gr.Slider(-50, 50, 0, label="X")
838
- # c_y = gr.Slider(-50, 50, 0, label="Y")
839
- # c_z = gr.Slider(-20, 50, 0, label="Z")
840
- # with gr.Row():
841
- # c_r = gr.Slider(-30, 30, 0, label="Roll")
842
- # c_p = gr.Slider(-30, 30, 0, label="Pitch")
843
- # c_y_aw = gr.Slider(-45, 45, 0, label="Yaw")
844
- # with gr.Row():
845
- # c_la = gr.Slider(-180, 180, 0, label="Left Ant")
846
- # c_ra = gr.Slider(-180, 180, 0, label="Right Ant")
847
-
848
- # c_dur = gr.Slider(0.1, 5.0, 1.0, label="Duration")
849
- # c_add = gr.Button("➕ Add Custom Move", variant="primary")
850
-
851
- # def _add_custom(x,y,z,r,p,yw,la,ra,d):
852
- # m = Movement("Custom", x,y,z,r,p,yw,la,ra,d)
853
- # return manager.add_movement(m)
854
-
855
- # c_add.click(_add_custom,
856
- # inputs=[c_x, c_y, c_z, c_r, c_p, c_y_aw, c_la, c_ra, c_dur],
857
- # outputs=[queue_display, status_box])
858
-
859
- # --- Event Wiring ---
860
- # auto_play.change(lambda x: setattr(manager, 'auto_play', x), inputs=[auto_play])
861
- # play_btn.click(manager.play_queue, inputs=[speed], outputs=[queue_display, status_box])
862
- # stop_btn.click(manager.stop_playback, outputs=[queue_display, status_box])
863
- # clear_btn.click(manager.clear_queue, outputs=[queue_display, status_box])
864
- # undo_btn.click(manager.remove_last, outputs=[queue_display, status_box])
865
-
866
- demo.load(
867
- None,
868
- None,
869
- None,
870
- js="""
871
- () => {
872
- const keyMap = {
873
- 'w': 'btn-forward',
874
- 's': 'btn-back',
875
- 'a': 'btn-left',
876
- 'd': 'btn-right',
877
- 'q': 'btn-tilt-up',
878
- 'e': 'btn-tilt-down',
879
- 'h': 'btn-center',
880
- 'j': 'btn-body-left',
881
- 'l': 'btn-body-right',
882
- };
883
-
884
- let lastPressed = {};
885
- const REPEAT_MS = 120; // minimum time between repeated presses
886
-
887
- document.addEventListener('keydown', (ev) => {
888
- const key = ev.key.toLowerCase();
889
- const id = keyMap[key];
890
- if (!id) return;
891
-
892
- const now = Date.now();
893
- if (lastPressed[key] && now - lastPressed[key] < REPEAT_MS) {
894
- return; // simple debounce
895
- }
896
- lastPressed[key] = now;
897
-
898
- // Prevent page scrolling with space, etc
899
- ev.preventDefault();
900
-
901
- const btn = document.getElementById(id);
902
- if (btn) {
903
- btn.click();
904
- }
905
- });
906
-
907
- console.log('Keyboard control ready: WASD for x/y, Q/E for pitch, J/L for body yaw, H for center');
908
- }
909
- """,
910
- )
911
 
912
- # --- 6. Mount & Run ---
 
 
 
 
 
 
 
 
 
 
913
  app = gr.mount_gradio_app(app, demo, path="/")
914
 
915
  if __name__ == "__main__":
916
  print("🚀 Server starting on http://0.0.0.0:7860")
917
- print("ℹ️ Point your Robot/Sim to: ws://<YOUR_PC_IP>:7860/robot")
918
  uvicorn.run(app, host="0.0.0.0", port=7860, proxy_headers=True, forwarded_allow_ips="*")
 
 
1
  """
2
+ Reachy Mini Controller
3
  A centralized server that listens for Robot connections and hosts a Gradio control interface.
4
  """
5
 
6
  import asyncio
 
 
7
  import threading
8
  import time
9
  import queue
10
  from dataclasses import dataclass
11
+ from typing import Optional, Tuple
12
 
13
  import cv2
14
  import gradio as gr
 
16
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect
17
  from fastapi.responses import StreamingResponse
18
  import uvicorn
19
+ from fastrtc import WebRTC, StreamHandler, get_cloudflare_turn_credentials
20
 
21
+ from reachy_mini.utils import create_head_pose
22
+
23
+ # -------------------------------------------------------------------
24
+ # 1. Configuration
25
+ # -------------------------------------------------------------------
 
26
 
27
  AUDIO_SAMPLE_RATE = 16000 # respeaker samplerate
28
 
29
+ # Audio queue configuration
30
+ MAX_AUDIO_QUEUE_SIZE = 2
31
+
32
+ # Movement step sizes
33
+ NUDGE_ANGLE = 5.0 # degrees for head roll / yaw
34
+ NUDGE_BODY = 0.3 # degrees for body_yaw
35
+ NUDGE_PITCH = 5.0 # degrees for pitch
36
+
37
+ # Video loop timing
38
+ FRAME_SLEEP_S = 0.02
39
+
40
+ # TURN config
41
+ TURN_TTL_SERVER_MS = 360_000
42
+
43
+ USE_VIDEO_WEBRTC = True
44
+
45
+ turn_credentials = None# get_cloudflare_turn_credentials()
46
+ server_turn_credentials = None# get_cloudflare_turn_credentials(ttl=TURN_TTL_SERVER_MS)
47
 
48
 
49
+ # -------------------------------------------------------------------
50
+ # 2. Data Models
51
+ # -------------------------------------------------------------------
52
+
53
  @dataclass
54
  class Movement:
55
  name: str
 
64
  right_antenna: Optional[float] = None
65
  duration: float = 1.0
66
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # -------------------------------------------------------------------
69
+ # 3. Global State
70
+ # -------------------------------------------------------------------
 
 
71
 
 
72
  class GlobalState:
73
  """
74
  Singleton-style class to manage shared state between FastAPI (WebSockets)
75
+ and Gradio (UI).
76
  """
77
  def __init__(self):
78
  # Connection handles
79
  self.robot_ws: Optional[WebSocket] = None
80
  self.robot_loop: Optional[asyncio.AbstractEventLoop] = None
81
+
82
  # Video Stream Data
83
  self.frame_lock = threading.Lock()
84
  self.black_frame = np.zeros((640, 640, 3), dtype=np.uint8)
85
+ _, buffer = cv2.imencode(".jpg", self.black_frame)
86
  self.latest_frame_bytes = buffer.tobytes()
87
  self.latest_frame_ts = time.time()
88
 
89
  # Audio from robot -> browser
90
+ # Queue of (sample_rate: int, audio_bytes: bytes)
91
+ self.audio_queue: "queue.Queue[Tuple[int, bytes]]" = queue.Queue()
92
 
93
+ # Audio from operator -> robot
94
+ self.audio_to_robot_queue: "queue.Queue[bytes]" = queue.Queue()
95
 
96
+ # Live pose state
97
  self.pose_lock = threading.Lock()
98
  self.current_pose = Movement(
99
  name="Current",
100
+ x=0,
101
+ y=0,
102
+ z=0,
103
+ roll=0,
104
+ pitch=0,
105
+ yaw=0,
106
  body_yaw=0,
107
  left_antenna=0,
108
  right_antenna=0,
109
  duration=0.2,
110
  )
111
 
112
+ # --- Connection management ---
113
 
114
+ def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop) -> None:
115
  self.robot_ws = ws
116
  self.robot_loop = loop
117
 
118
+ def clear_robot_connection(self) -> None:
119
+ self.robot_ws = None
120
+ self.robot_loop = None
121
+
122
+ # --- Video ---
123
+
124
+ def update_frame(self, frame_bytes: bytes) -> None:
125
  with self.frame_lock:
126
  self.latest_frame_bytes = frame_bytes
127
  self.latest_frame_ts = time.time()
128
 
129
+ # --- Audio queues ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ @staticmethod
132
+ def _push_bounded(q: queue.Queue, item, max_size: int, description: str) -> None:
133
+ while q.qsize() >= max_size:
 
 
 
134
  try:
135
+ dropped = q.get_nowait()
136
+ del dropped
137
+ print(f"Dropping oldest audio {description}, queue size is {q.qsize()}")
138
  except queue.Empty:
139
  break
140
+ q.put(item)
141
+
142
+ def push_audio_from_robot(self, audio_bytes: bytes) -> None:
143
+ self._push_bounded(
144
+ self.audio_queue,
145
+ (AUDIO_SAMPLE_RATE, audio_bytes),
146
+ MAX_AUDIO_QUEUE_SIZE,
147
+ "FROM robot",
148
+ )
149
 
150
+ def push_audio_to_robot(self, audio_bytes: bytes) -> None:
151
+ self._push_bounded(
152
+ self.audio_to_robot_queue,
153
+ audio_bytes,
154
+ MAX_AUDIO_QUEUE_SIZE,
155
+ "TO robot",
156
+ )
157
 
158
  def get_audio_to_robot_blocking(self) -> bytes:
159
+ try:
160
+ return self.audio_to_robot_queue.get(timeout=0.2)
161
+ except queue.Empty:
162
+ return None
163
+
164
+ # --- Status ---
165
 
166
  def get_connection_status(self) -> str:
167
+ return "✅ Robot Connected" if self.robot_ws else "🔴 Waiting for Robot..."
168
+
169
+ # --- Pose management ---
170
 
171
  def update_pose(
172
  self,
 
178
  dyaw: float = 0,
179
  dbody_yaw: float = 0,
180
  ) -> Movement:
 
 
 
 
181
  with self.pose_lock:
182
  p = self.current_pose
183
 
 
195
  duration=0.4,
196
  )
197
 
198
+ # Clamp posed values
199
  new.pitch = float(np.clip(new.pitch, -30, 30))
200
  new.yaw = float(np.clip(new.yaw, -180, 180))
201
  new.roll = float(np.clip(new.roll, -40, 40))
 
208
  return new
209
 
210
  def reset_pose(self) -> Movement:
 
211
  with self.pose_lock:
212
  self.current_pose = Movement(
213
  name="Current",
214
+ x=0,
215
+ y=0,
216
+ z=0,
217
+ roll=0,
218
+ pitch=0,
219
+ yaw=0,
220
  body_yaw=0,
221
  left_antenna=0,
222
  right_antenna=0,
 
225
  return self.current_pose
226
 
227
  def get_pose_text(self) -> str:
 
228
  with self.pose_lock:
229
  p = self.current_pose
230
  return (
231
+ "Head position:\n"
232
  f" x={p.x:.1f}, y={p.y:.1f}, z={p.z:.1f}\n"
233
  f" roll={p.roll:.1f}, pitch={p.pitch:.1f}, yaw={p.yaw:.1f}\n"
234
+ "Body:\n"
235
  f" body_yaw={p.body_yaw:.1f}"
236
  )
237
 
238
+
239
  state = GlobalState()
240
 
241
+
242
+ # -------------------------------------------------------------------
243
+ # 4. Robot commands
244
+ # -------------------------------------------------------------------
245
+
246
  def send_pose_to_robot(mov: Movement, msg: str = "Move sent"):
 
 
 
 
247
  if not (state.robot_ws and state.robot_loop):
248
  return state.get_pose_text(), "⚠️ Robot not connected"
249
 
250
  pose = create_head_pose(
251
+ x=mov.x,
252
+ y=mov.y,
253
+ z=mov.z,
254
+ roll=mov.roll,
255
+ pitch=mov.pitch,
256
+ yaw=mov.yaw,
257
+ degrees=True,
258
+ mm=True,
259
  )
260
 
261
  payload = {
 
280
 
281
  return state.get_pose_text(), f"✅ {msg}"
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
+ # -------------------------------------------------------------------
285
+ # 5. Video streaming helpers
286
+ # -------------------------------------------------------------------
287
 
288
+ def generate_mjpeg_stream():
289
+ last_timestamp = 0.0
290
+ while True:
291
+ with state.frame_lock:
292
+ current_bytes = state.latest_frame_bytes
293
+ current_timestamp = state.latest_frame_ts
294
+
295
+ if current_timestamp > last_timestamp and current_bytes is not None:
296
+ last_timestamp = current_timestamp
297
+ yield (
298
+ b"--frame\r\n"
299
+ b"Content-Type: image/jpeg\r\n\r\n" + current_bytes + b"\r\n"
300
+ )
301
+ else:
302
+ time.sleep(FRAME_SLEEP_S)
303
+ continue
304
+
305
+ time.sleep(FRAME_SLEEP_S)
306
+
307
+
308
+ def webrtc_video_generator():
309
+ """
310
+ Generator for FastRTC WebRTC (mode='receive', modality='video').
311
+ """
312
+ last_ts = 0.0
313
+ frame = state.black_frame.copy()
314
+
315
+ while True:
316
+ with state.frame_lock:
317
+ ts = state.latest_frame_ts
318
+ frame_bytes = state.latest_frame_bytes
319
+
320
+ if ts > last_ts and frame_bytes:
321
+ last_ts = ts
322
+ np_bytes = np.frombuffer(frame_bytes, dtype=np.uint8)
323
+ decoded = cv2.imdecode(np_bytes, cv2.IMREAD_COLOR)
324
+ if decoded is not None:
325
+ frame = decoded
326
  else:
327
+ frame = state.black_frame.copy()
328
+
329
+ yield frame
 
 
 
330
 
 
331
 
332
+ # -------------------------------------------------------------------
333
+ # 6. FastAPI endpoints
334
+ # -------------------------------------------------------------------
335
 
 
336
  app = FastAPI()
337
 
338
+
339
  @app.websocket("/robot")
340
  async def robot_endpoint(ws: WebSocket):
341
+ """Endpoint for the Robot to connect to (control channel)."""
342
  await ws.accept()
343
  state.set_robot_connection(ws, asyncio.get_running_loop())
344
+ print("[System] Robot Connected")
345
+
346
  try:
 
347
  while True:
348
+ msg = await ws.receive()
349
+ if msg.get("type") == "websocket.disconnect":
 
 
350
  break
351
  except (WebSocketDisconnect, Exception):
352
  print("[System] Robot Disconnected")
353
  finally:
354
+ state.clear_robot_connection()
355
+
356
 
357
  @app.get("/video_feed")
358
  def video_feed():
359
  return StreamingResponse(
360
+ generate_mjpeg_stream(),
361
+ media_type="multipart/x-mixed-replace; boundary=frame",
362
  )
363
 
364
+
365
  @app.websocket("/video_stream")
366
  async def stream_endpoint(ws: WebSocket):
367
  """Endpoint for Robot/Sim to send video frames."""
 
369
  try:
370
  while True:
371
  msg = await ws.receive()
372
+ data = msg.get("bytes")
373
+ if data:
374
+ state.update_frame(data)
375
+ except asyncio.CancelledError:
376
+ print("[Video] stream_endpoint cancelled")
377
  except Exception:
378
+ print("[Video] stream_endpoint closed")
379
+ finally:
380
+ print("[Video] stream_endpoint closed")
381
 
382
  @app.websocket("/audio_stream")
383
  async def audio_endpoint(ws: WebSocket):
 
386
  print("[Audio] Stream Connected")
387
 
388
  async def robot_to_server():
 
389
  try:
390
  while True:
391
  data = await ws.receive()
392
  t = data.get("type")
 
393
  if t == "websocket.disconnect":
394
  print("[Audio] Disconnected (recv)")
395
  break
396
 
397
  if t == "websocket.receive":
398
  if data.get("bytes"):
399
+ state.push_audio_from_robot(data["bytes"])
 
400
  elif data.get("text") == "ping":
401
  print("[Audio] Received ping")
402
+ except asyncio.CancelledError:
403
+ print("[Audio] robot_to_server cancelled")
404
  except Exception as e:
405
  print(f"[Audio] robot_to_server error: {e}")
406
 
407
  async def server_to_robot():
 
408
  loop = asyncio.get_running_loop()
409
  try:
410
  while True:
411
  chunk: bytes = await loop.run_in_executor(
412
  None, state.get_audio_to_robot_blocking
413
  )
414
+ if chunk is not None:
415
+ await ws.send_bytes(chunk)
416
+ except asyncio.CancelledError:
417
+ print("[Audio] server_to_robot cancelled")
418
  except Exception as e:
419
  print(f"[Audio] server_to_robot error: {e}")
420
 
421
  try:
422
  await asyncio.gather(robot_to_server(), server_to_robot())
423
+ except asyncio.CancelledError:
424
+ print("[Audio] audio_endpoint cancelled")
425
  finally:
426
  print("[Audio] Stream Closed")
427
 
428
 
429
+ # -------------------------------------------------------------------
430
+ # 7. FastRTC audio handler
431
+ # -------------------------------------------------------------------
432
 
433
  class RobotAudioHandler(StreamHandler):
434
  """
435
  FastRTC handler that connects browser WebRTC audio to the robot.
436
 
437
+ - receive(): browser mic -> state.audio_to_robot_queue -> /audio_stream -> robot
438
+ - emit(): state.audio_queue (robot) -> browser playback
439
  """
440
 
441
  def __init__(self) -> None:
442
+ super().__init__(
443
+ input_sample_rate=AUDIO_SAMPLE_RATE,
444
+ output_sample_rate=AUDIO_SAMPLE_RATE,
445
+ )
446
 
447
+ def receive(self, frame: Tuple[int, np.ndarray]) -> None:
 
448
  if frame is None:
449
  return
450
 
451
+ sample_rate, array = frame
452
  if array is None:
453
  return
454
 
 
458
  if arr.ndim > 1:
459
  arr = arr[0]
460
 
 
461
  if arr.dtype != np.int16:
462
  if np.issubdtype(arr.dtype, np.floating):
463
  arr = np.clip(arr, -1.0, 1.0)
 
468
  state.push_audio_to_robot(arr.tobytes())
469
 
470
  def emit(self):
 
 
 
 
 
471
  try:
472
  sample_rate, frame_bytes = state.audio_queue.get(timeout=0.5)
473
  audio = np.frombuffer(frame_bytes, dtype=np.int16).reshape(1, -1)
474
  return sample_rate, audio
475
  except queue.Empty:
 
476
  return None
477
 
478
  def copy(self) -> "RobotAudioHandler":
 
 
 
 
479
  return RobotAudioHandler()
480
 
481
  def shutdown(self) -> None:
 
482
  pass
483
 
484
  def start_up(self) -> None:
 
485
  pass
486
 
487
 
488
+ # -------------------------------------------------------------------
489
+ # 8. Movement UI helpers
490
+ # -------------------------------------------------------------------
 
 
 
 
491
 
492
+ def get_pose_string():
493
+ """Returns pose in format JS can parse: pitch:X,yaw:Y,roll:Z,body:B"""
494
+ with state.pose_lock:
495
+ p = state.current_pose
496
+ return f"pitch:{p.pitch:.1f},yaw:{p.yaw:.1f},roll:{p.roll:.1f},body:{p.body_yaw:.1f}"
497
 
 
 
 
 
 
 
 
 
 
 
 
498
 
499
+ def nudge_pose(dpitch=0, dyaw=0, droll=0, dbody_yaw=0, label="Move"):
500
+ """Modified to return pose string instead of tuple."""
501
+ mov = state.update_pose(
502
+ dpitch=dpitch,
503
+ dyaw=dyaw,
504
+ droll=droll,
505
+ dbody_yaw=dbody_yaw,
506
+ )
507
+ send_pose_to_robot(mov, label)
508
+ return get_pose_string()
509
 
 
 
510
 
511
+ def center_pose():
512
+ """Modified to return pose string."""
513
+ mov = state.reset_pose()
514
+ send_pose_to_robot(mov, "Reset pose")
515
+ return get_pose_string()
516
 
517
 
518
+ # -------------------------------------------------------------------
519
+ # 9. Gradio UI
520
+ # -------------------------------------------------------------------
521
 
522
+ CUSTOM_CSS = """
523
+ /* Dark theme overrides */
524
+ .gradio-container {
525
+ background: linear-gradient(135deg, #0a0a0f 0%, #121218 100%) !important;
526
+ min-height: 100vh;
527
+ }
528
+ .dark {
529
+ --background-fill-primary: #12121a !important;
530
+ --background-fill-secondary: #1a1a24 !important;
531
+ --border-color-primary: #2a2a3a !important;
532
+ --text-color-subdued: #888 !important;
533
+ }
 
 
 
 
 
 
534
 
535
+ /* Header styling */
536
+ #header-row {
537
+ background: transparent !important;
538
+ border: none !important;
539
+ margin-bottom: 1rem;
540
+ display: flex !important;
541
+ justify-content: space-between !important;
542
+ align-items: center !important;
543
+ }
544
+ #app-title {
545
+ font-size: 1.5rem !important;
546
+ font-weight: 600 !important;
547
+ background: linear-gradient(90deg, #fff, #888) !important;
548
+ -webkit-background-clip: text !important;
549
+ -webkit-text-fill-color: transparent !important;
550
+ border: none !important;
551
+ padding: 0 !important;
552
+ margin: 0 !important;
553
+ }
554
 
555
+ /* Status badge */
556
+ #status-box {
557
+ flex-shrink: 0 !important;
558
+ width: auto !important;
559
+ max-width: 200px !important;
560
+ min-width: 160px !important;
561
+ background: rgba(16, 185, 129, 0.15) !important;
562
+ border: 1px solid rgba(16, 185, 129, 0.4) !important;
563
+ border-radius: 9999px !important;
564
+ padding: 0.4rem 1rem !important;
565
+ font-size: 0.875rem !important;
566
+ }
567
+ #status-box textarea {
568
+ background: transparent !important;
569
+ border: none !important;
570
+ color: #10b981 !important;
571
+ text-align: center !important;
572
+ font-weight: 500 !important;
573
+ padding: 0 !important;
574
+ min-height: unset !important;
575
+ height: auto !important;
576
+ line-height: 1.4 !important;
577
+ }
578
 
579
+ /* Video panel */
580
+ #video-column {
581
+ background: #0f0f14 !important;
582
+ border-radius: 1rem !important;
583
+ border: 1px solid #2a2a3a !important;
584
+ overflow: hidden !important;
585
+ min-height: 500px !important;
586
+ }
587
+ #robot-video {
588
+ border-radius: 0.75rem !important;
589
+ overflow: hidden !important;
590
+ }
591
 
592
+ /* Control panel cards */
593
+ .control-card {
594
+ background: rgba(26, 26, 36, 0.8) !important;
595
+ border: 1px solid #2a2a3a !important;
596
+ border-radius: 0.75rem !important;
597
+ padding: 1rem !important;
598
+ }
599
 
600
+ /* Audio section */
601
+ #audio-section {
602
+ background: rgba(26, 26, 36, 0.8) !important;
603
+ border: 1px solid #2a2a3a !important;
604
+ border-radius: 0.75rem !important;
605
+ }
606
+ #listen-btn {
607
+ background: rgba(139, 92, 246, 0.2) !important;
608
+ border: 1px solid rgba(139, 92, 246, 0.3) !important;
609
+ color: #a78bfa !important;
610
+ border-radius: 0.5rem !important;
611
+ transition: all 0.2s !important;
612
+ }
613
+ #listen-btn:hover {
614
+ background: rgba(139, 92, 246, 0.3) !important;
615
+ }
616
 
617
+ /* Hide the default keyboard buttons */
618
+ #keyboard-buttons {
619
+ display: none !important;
620
+ }
621
 
622
+ /* Quick action buttons */
623
+ .quick-btn {
624
+ background: #1f1f2e !important;
625
+ border: 1px solid #2a2a3a !important;
626
+ border-radius: 0.5rem !important;
627
+ padding: 0.5rem !important;
628
+ font-size: 0.75rem !important;
629
+ transition: all 0.2s !important;
630
+ }
631
+ .quick-btn:hover {
632
+ background: #2a2a3a !important;
633
+ }
634
 
635
+ /* Keyboard visualization container */
636
+ #keyboard-viz {
637
+ position: fixed;
638
+ bottom: 3.5rem;
639
+ right: 2rem;
640
+ z-index: 1000;
641
+ pointer-events: none;
642
+ }
643
 
644
+ /* Gauges container */
645
+ #gauges-viz {
646
+ position: fixed;
647
+ bottom: 3.5rem;
648
+ left: 2rem;
649
+ z-index: 1000;
650
+ pointer-events: none;
651
+ }
652
 
653
+ /* Hide Gradio footer or make room for it */
654
+ footer {
655
+ opacity: 0.5;
656
+ }
 
 
657
 
658
+ /* Hidden pose state (keep in DOM for JS) */
659
+ #pose-state {
660
+ position: absolute !important;
661
+ opacity: 0 !important;
662
+ pointer-events: none !important;
663
+ height: 0 !important;
664
+ overflow: hidden !important;
665
+ }
666
+ """
667
 
668
+ KEYBOARD_VIZ_HTML = """
669
+ <div id="keyboard-viz">
670
+ <div style="
671
+ background: rgba(0,0,0,0.75);
672
+ backdrop-filter: blur(12px);
673
+ border: 1px solid rgba(255,255,255,0.15);
674
+ border-radius: 16px;
675
+ padding: 16px;
676
+ display: flex;
677
+ flex-direction: column;
678
+ gap: 6px;
679
+ align-items: center;
680
+ ">
681
+ <div style="display: flex; gap: 6px;">
682
+ <div class="key" data-key="q">Q</div>
683
+ <div class="key" data-key="w">W</div>
684
+ <div class="key" data-key="e">E</div>
685
+ </div>
686
+ <div style="display: flex; gap: 6px;">
687
+ <div class="key" data-key="a">A</div>
688
+ <div class="key" data-key="s">S</div>
689
+ <div class="key" data-key="d">D</div>
690
+ </div>
691
+ <div style="display: flex; gap: 6px; margin-top: 6px;">
692
+ <div class="key" data-key="j">J</div>
693
+ <div class="key" data-key="h">H</div>
694
+ <div class="key" data-key="l">L</div>
695
+ </div>
696
+ </div>
697
+ </div>
698
+
699
+ <style>
700
+ .key {
701
+ width: 48px;
702
+ height: 48px;
703
+ background: linear-gradient(180deg, #3a3a4a 0%, #2a2a3a 100%);
704
+ border: 1px solid #4a4a5a;
705
+ border-radius: 8px;
706
+ display: flex;
707
+ align-items: center;
708
+ justify-content: center;
709
+ font-size: 14px;
710
+ font-weight: 600;
711
+ color: #ccc;
712
+ font-family: system-ui, sans-serif;
713
+ transition: all 0.1s;
714
+ }
715
+ .key.active {
716
+ background: linear-gradient(180deg, #8b5cf6 0%, #7c3aed 100%);
717
+ border-color: #a78bfa;
718
+ color: white;
719
+ box-shadow: 0 0 16px rgba(139, 92, 246, 0.6);
720
+ transform: scale(0.95);
721
+ }
722
+ </style>
723
+ """
724
 
725
+ GAUGES_HTML = """
726
+ <div id="gauges-viz">
727
+ <div style="
728
+ background: rgba(0,0,0,0.75);
729
+ backdrop-filter: blur(12px);
730
+ border: 1px solid rgba(255,255,255,0.15);
731
+ border-radius: 16px;
732
+ padding: 16px 20px;
733
+ display: flex;
734
+ gap: 24px;
735
+ ">
736
+ <div class="gauge-container">
737
+ <svg width="72" height="48" viewBox="0 0 72 48">
738
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
739
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#8b5cf6" stroke-width="5" stroke-linecap="round"
740
+ stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="pitch"/>
741
+ <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
742
+ class="gauge-needle" data-gauge="pitch" transform="rotate(0, 36, 42)"/>
743
+ <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
744
+ </svg>
745
+ <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Pitch</div>
746
+ <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="pitch">0.0°</div>
747
+ </div>
748
+ <div class="gauge-container">
749
+ <svg width="72" height="48" viewBox="0 0 72 48">
750
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
751
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#06b6d4" stroke-width="5" stroke-linecap="round"
752
+ stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="yaw"/>
753
+ <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
754
+ class="gauge-needle" data-gauge="yaw" transform="rotate(0, 36, 42)"/>
755
+ <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
756
+ </svg>
757
+ <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Yaw</div>
758
+ <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="yaw">0.0°</div>
759
+ </div>
760
+ <div class="gauge-container">
761
+ <svg width="72" height="48" viewBox="0 0 72 48">
762
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
763
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#f59e0b" stroke-width="5" stroke-linecap="round"
764
+ stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="roll"/>
765
+ <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
766
+ class="gauge-needle" data-gauge="roll" transform="rotate(0, 36, 42)"/>
767
+ <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
768
+ </svg>
769
+ <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Roll</div>
770
+ <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="roll">0.0°</div>
771
+ </div>
772
+ <div style="width: 1px; background: #3a3a4a; margin: 0 4px;"></div>
773
+ <div class="gauge-container">
774
+ <svg width="72" height="48" viewBox="0 0 72 48">
775
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#2a2a3a" stroke-width="5" stroke-linecap="round"/>
776
+ <path d="M 6 42 A 30 30 0 0 1 66 42" fill="none" stroke="#ec4899" stroke-width="5" stroke-linecap="round"
777
+ stroke-dasharray="94.2" stroke-dashoffset="47.1" class="gauge-arc" data-gauge="body"/>
778
+ <line x1="36" y1="42" x2="36" y2="18" stroke="white" stroke-width="3" stroke-linecap="round"
779
+ class="gauge-needle" data-gauge="body" transform="rotate(0, 36, 42)"/>
780
+ <circle cx="36" cy="42" r="6" fill="#1a1a24" stroke="#3a3a4a" stroke-width="2"/>
781
+ </svg>
782
+ <div style="text-align: center; font-family: system-ui; font-size: 12px; color: #888; margin-top: 4px;">Body</div>
783
+ <div style="text-align: center; font-family: monospace; font-size: 14px; color: #fff; font-weight: 500;" class="gauge-value" data-gauge="body">0.0°</div>
784
+ </div>
785
+ </div>
786
+ </div>
787
+ """
788
 
789
+ KEYBOARD_JS = """
790
+ () => {
791
+ const keyMap = {
792
+ 'w': 'w', 's': 's', 'a': 'a', 'd': 'd',
793
+ 'q': 'q', 'e': 'e', 'h': 'h', 'j': 'j', 'l': 'l',
794
+ };
795
+ const btnMap = {
796
+ 'w': 'btn-forward', 's': 'btn-back', 'a': 'btn-left', 'd': 'btn-right',
797
+ 'q': 'btn-tilt-up', 'e': 'btn-tilt-down', 'h': 'btn-center',
798
+ 'j': 'btn-body-left', 'l': 'btn-body-right',
799
+ };
800
+
801
+ let lastPressed = {};
802
+ const REPEAT_MS = 120;
803
 
804
+ document.addEventListener('keydown', (ev) => {
805
+ const key = ev.key.toLowerCase();
806
+ if (!keyMap[key]) return;
807
+
808
+ // Visual feedback
809
+ const keyEl = document.querySelector(`.key[data-key="${key}"]`);
810
+ if (keyEl) keyEl.classList.add('active');
811
+
812
+ // Rate limit and trigger button
813
+ const now = Date.now();
814
+ if (lastPressed[key] && now - lastPressed[key] < REPEAT_MS) return;
815
+ lastPressed[key] = now;
816
+ ev.preventDefault();
817
+
818
+ const btn = document.getElementById(btnMap[key]);
819
+ if (btn) btn.click();
820
+ });
821
+
822
+ document.addEventListener('keyup', (ev) => {
823
+ const key = ev.key.toLowerCase();
824
+ const keyEl = document.querySelector(`.key[data-key="${key}"]`);
825
+ if (keyEl) keyEl.classList.remove('active');
826
+ });
827
+
828
+ // Watch pose-state textbox for changes and update gauges
829
+ const updateGaugesFromState = () => {
830
+ const poseEl = document.querySelector('#pose-state textarea');
831
+ if (!poseEl) return;
832
+
833
+ const text = poseEl.value;
834
+ // Parse: "pitch:0.0,yaw:0.0,roll:0.0,body:0.0"
835
+ const match = text.match(/pitch:([\\d.-]+),yaw:([\\d.-]+),roll:([\\d.-]+),body:([\\d.-]+)/);
836
+ if (!match) return;
837
+
838
+ const pitch = parseFloat(match[1]);
839
+ const yaw = parseFloat(match[2]);
840
+ const roll = parseFloat(match[3]);
841
+ const body = parseFloat(match[4]);
842
+
843
+ const gauges = { pitch: [-30, 30], yaw: [-180, 180], roll: [-40, 40], body: [-3, 3] };
844
+ const values = { pitch, yaw, roll, body };
845
+
846
+ Object.entries(gauges).forEach(([name, [min, max]]) => {
847
+ const value = values[name];
848
+ const normalized = (value - min) / (max - min);
849
+ const angle = (normalized - 0.5) * 180;
850
+
851
+ const needle = document.querySelector(`.gauge-needle[data-gauge="${name}"]`);
852
+ if (needle) needle.setAttribute('transform', `rotate(${angle}, 36, 42)`);
853
+
854
+ const display = document.querySelector(`.gauge-value[data-gauge="${name}"]`);
855
+ if (display) display.textContent = value.toFixed(1) + '°';
856
+ });
857
+ };
858
+
859
+ // Poll for pose updates every 100ms
860
+ setInterval(updateGaugesFromState, 100);
861
+
862
+ // Update status box styling based on connection state
863
+ const updateStatusStyle = () => {
864
+ const statusBox = document.querySelector('#status-box');
865
+ if (!statusBox) return;
866
+ const textarea = statusBox.querySelector('textarea');
867
+ if (!textarea) return;
868
+
869
+ const isConnected = textarea.value.includes('Connected');
870
+ if (isConnected) {
871
+ statusBox.style.background = 'rgba(16, 185, 129, 0.15)';
872
+ statusBox.style.borderColor = 'rgba(16, 185, 129, 0.4)';
873
+ textarea.style.color = '#10b981';
874
+ } else {
875
+ statusBox.style.background = 'rgba(239, 68, 68, 0.15)';
876
+ statusBox.style.borderColor = 'rgba(239, 68, 68, 0.4)';
877
+ textarea.style.color = '#ef4444';
878
+ }
879
+ };
880
+ setInterval(updateStatusStyle, 500);
881
 
882
+ console.log('🎮 Keyboard controls ready');
883
+ }
884
+ """
885
 
886
+ # -------------------------------------------------------------------
887
+ # Gradio UI with new styling
888
+ # -------------------------------------------------------------------
889
+
890
+ with gr.Blocks(
891
+ title="Reachy Controller",
892
+ theme=gr.themes.Base(
893
+ primary_hue="violet",
894
+ neutral_hue="slate",
895
+ ),
896
+ css=CUSTOM_CSS,
897
+ ) as demo:
898
 
899
+ # Header
900
+ with gr.Row(elem_id="header-row"):
901
+ gr.Markdown("## 🤖 Reachy Mini", elem_id="app-title")
902
+ status_box = gr.Textbox(
903
+ value=state.get_connection_status,
904
+ every=2,
905
+ show_label=False,
906
+ container=False,
907
+ elem_id="status-box",
908
+ )
909
+
910
  with gr.Row():
911
+ # Left column - Controls
912
  with gr.Column(scale=1):
913
+ # Hidden pose state textbox - polls pose for JS gauges
914
+ pose_state = gr.Textbox(
915
+ value=get_pose_string,
916
+ every=0.2,
917
+ show_label=False,
918
+ container=False,
919
+ elem_id="pose-state",
920
  )
921
+
922
+ # Audio section
923
+ with gr.Group(elem_id="audio-section"):
924
+ gr.Markdown("### 🎧 Audio")
925
+ listen_btn = gr.Button("🎤 Start Listening", elem_id="listen-btn")
 
 
926
  robot_audio = WebRTC(
927
+ label="",
928
  modality="audio",
929
  mode="send-receive",
930
+ rtc_configuration=turn_credentials,
931
+ server_rtc_configuration=server_turn_credentials,
932
+ full_screen=False,
933
  )
 
 
934
  robot_audio.stream(
935
  fn=RobotAudioHandler(),
936
  inputs=[robot_audio],
937
  outputs=[robot_audio],
 
938
  )
939
+
940
+ # Quick actions
941
+ with gr.Group(elem_classes="control-card"):
942
+ gr.Markdown("### Quick Actions")
943
+ with gr.Row():
944
+ btn_center_quick = gr.Button("🏠 Center", elem_classes="quick-btn")
945
+ btn_look_up = gr.Button("👀 Look Up", elem_classes="quick-btn")
946
+ with gr.Row():
947
+ btn_curious = gr.Button("🎭 Curious", elem_classes="quick-btn")
948
+ btn_excited = gr.Button("🎉 Excited", elem_classes="quick-btn")
949
+
950
+ # Hidden keyboard buttons (still needed for JS clicks)
951
+ with gr.Group(elem_id="keyboard-buttons"):
952
+ btn_forward = gr.Button("W", elem_id="btn-forward")
953
+ btn_back = gr.Button("S", elem_id="btn-back")
954
+ btn_left = gr.Button("A", elem_id="btn-left")
955
+ btn_right = gr.Button("D", elem_id="btn-right")
956
+ btn_tilt_up = gr.Button("Q", elem_id="btn-tilt-up")
957
+ btn_tilt_down = gr.Button("E", elem_id="btn-tilt-down")
958
+ btn_body_left = gr.Button("J", elem_id="btn-body-left")
959
+ btn_body_right = gr.Button("L", elem_id="btn-body-right")
960
+ btn_center = gr.Button("H", elem_id="btn-center")
961
+
962
+ # Wire up hidden buttons - outputs required for Gradio to execute!
963
+ btn_forward.click(
964
+ lambda: nudge_pose(dpitch=-NUDGE_PITCH, label="W"),
965
+ outputs=[pose_state],
966
+ )
967
+ btn_back.click(
968
+ lambda: nudge_pose(dpitch=NUDGE_PITCH, label="S"),
969
+ outputs=[pose_state],
970
+ )
971
+ btn_left.click(
972
+ lambda: nudge_pose(dyaw=NUDGE_ANGLE * 2, label="A"),
973
+ outputs=[pose_state],
974
+ )
975
+ btn_right.click(
976
+ lambda: nudge_pose(dyaw=-NUDGE_ANGLE * 2, label="D"),
977
+ outputs=[pose_state],
978
+ )
979
+ btn_tilt_up.click(
980
+ lambda: nudge_pose(droll=-NUDGE_ANGLE, label="Q"),
981
+ outputs=[pose_state],
982
+ )
983
+ btn_tilt_down.click(
984
+ lambda: nudge_pose(droll=NUDGE_ANGLE, label="E"),
985
+ outputs=[pose_state],
986
+ )
987
+ btn_body_left.click(
988
+ lambda: nudge_pose(dbody_yaw=NUDGE_BODY, label="J"),
989
+ outputs=[pose_state],
990
+ )
991
+ btn_body_right.click(
992
+ lambda: nudge_pose(dbody_yaw=-NUDGE_BODY, label="L"),
993
+ outputs=[pose_state],
994
+ )
995
+ btn_center.click(center_pose, outputs=[pose_state])
996
+
997
+ # Wire up quick action buttons
998
+ btn_center_quick.click(center_pose, outputs=[pose_state])
999
+ btn_look_up.click(
1000
+ lambda: nudge_pose(dpitch=-15, label="Look Up"),
1001
+ outputs=[pose_state],
1002
+ )
1003
+ btn_curious.click(
1004
+ lambda: nudge_pose(dpitch=-10, droll=15, label="Curious"),
1005
+ outputs=[pose_state],
1006
+ )
1007
+ btn_excited.click(
1008
+ lambda: nudge_pose(dpitch=-5, droll=-10, label="Excited"),
1009
+ outputs=[pose_state],
1010
+ )
1011
+
1012
+ # Right column - Video
1013
+ with gr.Column(scale=2, elem_id="video-column"):
1014
  robot_video = WebRTC(
1015
+ label="",
1016
  modality="video",
1017
  mode="receive",
1018
+ rtc_configuration=turn_credentials,
1019
+ server_rtc_configuration=server_turn_credentials,
1020
+ elem_id="robot-video",
1021
  )
1022
  robot_video.stream(
1023
+ fn=webrtc_video_generator,
1024
  inputs=[],
1025
  outputs=[robot_video],
1026
  trigger=listen_btn.click,
1027
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1028
 
1029
+ # Floating keyboard visualization
1030
+ gr.HTML(KEYBOARD_VIZ_HTML)
1031
+ gr.HTML(GAUGES_HTML)
1032
+
1033
+ # Load keyboard handler
1034
+ demo.load(None, None, None, js=KEYBOARD_JS)
1035
+
1036
+ # -------------------------------------------------------------------
1037
+ # 10. Mount & run
1038
+ # -------------------------------------------------------------------
1039
+
1040
  app = gr.mount_gradio_app(app, demo, path="/")
1041
 
1042
  if __name__ == "__main__":
1043
  print("🚀 Server starting on http://0.0.0.0:7860")
1044
+ print("ℹ️ Point your Robot/Sim to: ws://<YOUR_PC_IP>:7860/robot")
1045
  uvicorn.run(app, host="0.0.0.0", port=7860, proxy_headers=True, forwarded_allow_ips="*")
1046
+