andito HF Staff commited on
Commit
eed1295
·
verified ·
1 Parent(s): e7a0004

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -60
app.py CHANGED
@@ -1245,11 +1245,49 @@ KEYBOARD_JS = """
1245
  APP_JS = """
1246
  () => {
1247
  // ==========================================
1248
- // 1. BI-DIRECTIONAL AUDIO (Talk & Listen)
1249
  // ==========================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1250
  window.startAudioPlayer = async function() {
1251
  const btn = document.getElementById('start-stream-btn');
1252
  const status = document.getElementById('audio-status');
 
1253
 
1254
  console.log("[Audio] Starting Bi-Directional Stream...");
1255
 
@@ -1266,24 +1304,38 @@ APP_JS = """
1266
  btn.disabled = true;
1267
 
1268
  // --- B. Get Microphone (Input) ---
1269
- const stream = await navigator.mediaDevices.getUserMedia({
 
 
1270
  audio: {
 
1271
  channelCount: 1,
1272
  sampleRate: 16000,
1273
  echoCancellation: true,
1274
  noiseSuppression: true,
1275
  autoGainControl: true
1276
  }
1277
- });
 
 
 
 
 
 
 
1278
 
1279
  status.innerText = "Status: Connecting WS...";
1280
 
1281
  // --- C. Setup WebSocket ---
1282
- // Determine correct protocol (ws:// or wss://)
1283
- const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
1284
- const wsUrl = `${protocol}//${window.location.host}/browser_stream`;
1285
- const ws = new WebSocket(wsUrl);
1286
- ws.binaryType = 'arraybuffer';
 
 
 
 
1287
 
1288
  // --- D. Setup Input Processor (Mic -> WS) ---
1289
  const source = ctx.createMediaStreamSource(stream);
@@ -1302,66 +1354,79 @@ APP_JS = """
1302
  int16Buffer[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
1303
  }
1304
 
1305
- // Send to Server
1306
  ws.send(int16Buffer.buffer);
1307
  };
1308
 
1309
  source.connect(processor);
1310
- processor.connect(ctx.destination); // Essential to keep the processor running
1311
 
1312
  // --- E. Setup Output (WS -> Speaker) ---
1313
- let nextTime = 0;
1314
-
1315
- ws.onopen = () => {
1316
- console.log("[Audio] WebSocket Open");
1317
- status.innerText = "Status: 🟢 Connected (Talk & Listen)";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1318
  btn.innerText = "Microphone Active";
1319
- };
1320
 
1321
- ws.onmessage = (event) => {
1322
- // We received Raw Int16 PCM from Robot
1323
- const int16Data = new Int16Array(event.data);
1324
- const floatBuffer = ctx.createBuffer(1, int16Data.length, 16000);
1325
- const channelData = floatBuffer.getChannelData(0);
1326
 
1327
- let maxVol = 0;
1328
- for (let i = 0; i < int16Data.length; i++) {
1329
- // Convert Int16 -> Float32
1330
- const floatVal = int16Data[i] / 32768.0;
1331
- channelData[i] = floatVal;
1332
- if (Math.abs(floatVal) > maxVol) maxVol = Math.abs(floatVal);
1333
  }
1334
-
1335
- // Log silence occasionaly
1336
- if (Math.random() < 0.01) {
1337
- console.log(`[Audio In] Vol: ${maxVol.toFixed(3)}`);
1338
- }
1339
-
1340
- // Play it
1341
- const src = ctx.createBufferSource();
1342
- src.buffer = floatBuffer;
1343
- src.connect(ctx.destination);
1344
-
1345
- const now = ctx.currentTime;
1346
- if (nextTime < now) nextTime = now;
1347
- src.start(nextTime);
1348
- nextTime += floatBuffer.duration;
1349
- };
1350
-
1351
- ws.onerror = (e) => {
1352
- console.error("WS Error", e);
1353
- status.innerText = "Status: WebSocket Error";
1354
- btn.disabled = false;
1355
- };
1356
-
1357
- ws.onclose = () => {
1358
- status.innerText = "Status: Disconnected";
1359
- btn.disabled = false;
1360
- btn.innerText = "▶️ Reconnect";
1361
- // Stop mic tracks
1362
- stream.getTracks().forEach(track => track.stop());
1363
  processor.disconnect();
1364
  source.disconnect();
 
 
 
1365
  };
1366
 
1367
  } catch (err) {
@@ -1370,9 +1435,13 @@ APP_JS = """
1370
  btn.disabled = false;
1371
  }
1372
  };
 
 
 
 
1373
 
1374
  // ==========================================
1375
- // 2. KEYBOARD & GAUGE LOGIC (Keep existing)
1376
  // ==========================================
1377
  const keyMap = {'w':'w','s':'s','a':'a','d':'d','q':'q','e':'e','h':'h','j':'j','l':'l'};
1378
  const btnMap = {'w':'btn-forward','s':'btn-back','a':'btn-left','d':'btn-right','q':'btn-tilt-up','e':'btn-tilt-down','h':'btn-center','j':'btn-body-left','l':'btn-body-right'};
@@ -1423,7 +1492,7 @@ APP_JS = """
1423
  textarea.style.color = isConnected ? '#10b981' : '#ef4444';
1424
  };
1425
  setInterval(updateStatusStyle, 500);
1426
- console.log('🎮 Controls & Bi-Directional Audio Ready');
1427
  }
1428
  """
1429
 
@@ -1491,13 +1560,38 @@ with gr.Blocks(
1491
  trigger=listen_btn.click,
1492
  )
1493
  else:
1494
- # Pure HTML, logic is handled in APP_JS
1495
  audio_player_html = """
1496
  <div style="padding: 10px; background: rgba(0,0,0,0.2); border-radius: 8px;">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1497
  <button id="start-stream-btn" onclick="window.startAudioPlayer()"
1498
  style="background: #7c3aed; color: white; border: none; padding: 8px 16px; border-radius: 4px; cursor: pointer; width: 100%;">
1499
  ▶️ Click to Start Audio Stream
1500
  </button>
 
1501
  <div id="audio-status" style="margin-top: 8px; font-size: 12px; color: #aaa;">Status: Stopped</div>
1502
  </div>
1503
  """
@@ -1607,7 +1701,7 @@ with gr.Blocks(
1607
  gr.HTML(GAUGES_HTML)
1608
 
1609
  # Load keyboard handler
1610
- demo.load(None, None, None, js=APP_JS)
1611
 
1612
  # -------------------------------------------------------------------
1613
  # 10. Mount & run
@@ -1620,3 +1714,4 @@ if __name__ == "__main__":
1620
  print("ℹ️ Point your Robot/Sim to: ws://<YOUR_PC_IP>:7860/robot")
1621
  uvicorn.run(app, host="0.0.0.0", port=7860, proxy_headers=True, forwarded_allow_ips="*", log_level="warning")
1622
 
 
 
1245
  APP_JS = """
1246
  () => {
1247
  // ==========================================
1248
+ // 1. BI-DIRECTIONAL AUDIO WITH MIC SELECTION
1249
  // ==========================================
1250
+
1251
+ // Global handles to manage hot-swapping
1252
+ window.currentStream = null;
1253
+ window.wsHandle = null;
1254
+
1255
+ // --- Helper: Populate Mic List ---
1256
+ window.refreshMicList = async function() {
1257
+ const select = document.getElementById('mic-select');
1258
+ try {
1259
+ const devices = await navigator.mediaDevices.enumerateDevices();
1260
+ const audioInputs = devices.filter(device => device.kind === 'audioinput');
1261
+
1262
+ const currentVal = select.value;
1263
+ select.innerHTML = ''; // Clear existing
1264
+
1265
+ // Add Default option
1266
+ const defaultOpt = document.createElement('option');
1267
+ defaultOpt.value = "";
1268
+ defaultOpt.text = "Default Microphone";
1269
+ select.appendChild(defaultOpt);
1270
+
1271
+ audioInputs.forEach(device => {
1272
+ const option = document.createElement('option');
1273
+ option.value = device.deviceId;
1274
+ // If label is empty, permission isn't granted yet
1275
+ option.text = device.label || `Microphone ${device.deviceId.slice(0,5)}...`;
1276
+ select.appendChild(option);
1277
+ });
1278
+
1279
+ // Restore selection if it still exists
1280
+ if (currentVal) select.value = currentVal;
1281
+
1282
+ } catch (e) {
1283
+ console.error("Error listing devices", e);
1284
+ }
1285
+ };
1286
+
1287
  window.startAudioPlayer = async function() {
1288
  const btn = document.getElementById('start-stream-btn');
1289
  const status = document.getElementById('audio-status');
1290
+ const micSelect = document.getElementById('mic-select');
1291
 
1292
  console.log("[Audio] Starting Bi-Directional Stream...");
1293
 
 
1304
  btn.disabled = true;
1305
 
1306
  // --- B. Get Microphone (Input) ---
1307
+ // Check dropdown for specific device ID
1308
+ const selectedMicId = micSelect.value;
1309
+ const constraints = {
1310
  audio: {
1311
+ deviceId: selectedMicId ? { exact: selectedMicId } : undefined,
1312
  channelCount: 1,
1313
  sampleRate: 16000,
1314
  echoCancellation: true,
1315
  noiseSuppression: true,
1316
  autoGainControl: true
1317
  }
1318
+ };
1319
+
1320
+ const stream = await navigator.mediaDevices.getUserMedia(constraints);
1321
+ window.currentStream = stream; // Save global ref
1322
+
1323
+ // **Refresh list now that we have permission (to show labels)**
1324
+ await window.refreshMicList();
1325
+ if (selectedMicId) micSelect.value = selectedMicId;
1326
 
1327
  status.innerText = "Status: Connecting WS...";
1328
 
1329
  // --- C. Setup WebSocket ---
1330
+ // If we are restarting, reuse WS if open, or create new
1331
+ let ws = window.wsHandle;
1332
+ if (!ws || ws.readyState !== WebSocket.OPEN) {
1333
+ const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
1334
+ const wsUrl = `${protocol}//${window.location.host}/browser_stream`;
1335
+ ws = new WebSocket(wsUrl);
1336
+ ws.binaryType = 'arraybuffer';
1337
+ window.wsHandle = ws;
1338
+ }
1339
 
1340
  // --- D. Setup Input Processor (Mic -> WS) ---
1341
  const source = ctx.createMediaStreamSource(stream);
 
1354
  int16Buffer[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
1355
  }
1356
 
 
1357
  ws.send(int16Buffer.buffer);
1358
  };
1359
 
1360
  source.connect(processor);
1361
+ processor.connect(ctx.destination);
1362
 
1363
  // --- E. Setup Output (WS -> Speaker) ---
1364
+ // Only attach listener if it's a new WS connection
1365
+ if (!ws.onmessage) {
1366
+ let nextTime = 0;
1367
+
1368
+ ws.onopen = () => {
1369
+ console.log("[Audio] WebSocket Open");
1370
+ status.innerText = "Status: 🟢 Connected";
1371
+ btn.innerText = "Microphone Active";
1372
+ };
1373
+
1374
+ ws.onmessage = (event) => {
1375
+ const int16Data = new Int16Array(event.data);
1376
+ const floatBuffer = ctx.createBuffer(1, int16Data.length, 16000);
1377
+ const channelData = floatBuffer.getChannelData(0);
1378
+
1379
+ for (let i = 0; i < int16Data.length; i++) {
1380
+ // Apply 5x gain
1381
+ channelData[i] = (int16Data[i] / 32768.0) * 5.0;
1382
+ }
1383
+
1384
+ const src = ctx.createBufferSource();
1385
+ src.buffer = floatBuffer;
1386
+ src.connect(ctx.destination);
1387
+
1388
+ const now = ctx.currentTime;
1389
+ if (nextTime < now) nextTime = now;
1390
+ src.start(nextTime);
1391
+ nextTime += floatBuffer.duration;
1392
+ };
1393
+
1394
+ ws.onerror = (e) => {
1395
+ console.error("WS Error", e);
1396
+ status.innerText = "Status: WebSocket Error";
1397
+ btn.disabled = false;
1398
+ };
1399
+
1400
+ ws.onclose = () => {
1401
+ status.innerText = "Status: Disconnected";
1402
+ btn.disabled = false;
1403
+ btn.innerText = "▶️ Reconnect";
1404
+ if (window.currentStream) {
1405
+ window.currentStream.getTracks().forEach(track => track.stop());
1406
+ }
1407
+ processor.disconnect();
1408
+ source.disconnect();
1409
+ };
1410
+ } else {
1411
+ // If WS was already open, just update UI
1412
+ status.innerText = "Status: 🟢 Connected (Mic Switched)";
1413
  btn.innerText = "Microphone Active";
1414
+ }
1415
 
1416
+ // Handle Mic Switching
1417
+ micSelect.onchange = async () => {
1418
+ console.log("Switching microphone...");
1419
+ status.innerText = "Status: Switching Mic...";
 
1420
 
1421
+ // Stop current mic tracks
1422
+ if (window.currentStream) {
1423
+ window.currentStream.getTracks().forEach(t => t.stop());
 
 
 
1424
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1425
  processor.disconnect();
1426
  source.disconnect();
1427
+
1428
+ // Restart player (will pick up new value from dropdown)
1429
+ await window.startAudioPlayer();
1430
  };
1431
 
1432
  } catch (err) {
 
1435
  btn.disabled = false;
1436
  }
1437
  };
1438
+
1439
+ // Attempt to list mics on load (will likely have empty labels until permission)
1440
+ setTimeout(window.refreshMicList, 1000);
1441
+
1442
 
1443
  // ==========================================
1444
+ // 2. KEYBOARD & GAUGE LOGIC (Unchanged)
1445
  // ==========================================
1446
  const keyMap = {'w':'w','s':'s','a':'a','d':'d','q':'q','e':'e','h':'h','j':'j','l':'l'};
1447
  const btnMap = {'w':'btn-forward','s':'btn-back','a':'btn-left','d':'btn-right','q':'btn-tilt-up','e':'btn-tilt-down','h':'btn-center','j':'btn-body-left','l':'btn-body-right'};
 
1492
  textarea.style.color = isConnected ? '#10b981' : '#ef4444';
1493
  };
1494
  setInterval(updateStatusStyle, 500);
1495
+ console.log('🎮 Controls & Mic Select Ready');
1496
  }
1497
  """
1498
 
 
1560
  trigger=listen_btn.click,
1561
  )
1562
  else:
1563
+ # HTML with Microphone Select Dropdown
1564
  audio_player_html = """
1565
  <div style="padding: 10px; background: rgba(0,0,0,0.2); border-radius: 8px;">
1566
+
1567
+ <div style="margin-bottom: 8px; display: flex; gap: 8px;">
1568
+ <select id="mic-select" style="
1569
+ background: #1f1f2e;
1570
+ color: #ccc;
1571
+ border: 1px solid #3a3a4a;
1572
+ padding: 6px;
1573
+ border-radius: 4px;
1574
+ flex-grow: 1;
1575
+ outline: none;">
1576
+ <option value="">Default Microphone</option>
1577
+ </select>
1578
+
1579
+ <button onclick="window.refreshMicList()" style="
1580
+ background: #2a2a3a;
1581
+ border: 1px solid #3a3a4a;
1582
+ color: #aaa;
1583
+ border-radius: 4px;
1584
+ cursor: pointer;
1585
+ padding: 0 8px;" title="Refresh Device List">
1586
+ 🔄
1587
+ </button>
1588
+ </div>
1589
+
1590
  <button id="start-stream-btn" onclick="window.startAudioPlayer()"
1591
  style="background: #7c3aed; color: white; border: none; padding: 8px 16px; border-radius: 4px; cursor: pointer; width: 100%;">
1592
  ▶️ Click to Start Audio Stream
1593
  </button>
1594
+
1595
  <div id="audio-status" style="margin-top: 8px; font-size: 12px; color: #aaa;">Status: Stopped</div>
1596
  </div>
1597
  """
 
1701
  gr.HTML(GAUGES_HTML)
1702
 
1703
  # Load keyboard handler
1704
+ demo.load(None, None, None, js=KEYBOARD_JS if USE_AUDIO_WEBRTC else APP_JS)
1705
 
1706
  # -------------------------------------------------------------------
1707
  # 10. Mount & run
 
1714
  print("ℹ️ Point your Robot/Sim to: ws://<YOUR_PC_IP>:7860/robot")
1715
  uvicorn.run(app, host="0.0.0.0", port=7860, proxy_headers=True, forwarded_allow_ips="*", log_level="warning")
1716
 
1717
+