Upload 7 files
Browse files- audio_processor.js +36 -0
- encoder_sleeker_fp32.onnx +3 -0
- encoder_worker.js +214 -0
- index.html +426 -18
- streaming_asr.js +801 -0
- ten_vad.js +30 -0
- ten_vad.wasm +3 -0
audio_processor.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* AudioWorklet Processor for low-latency audio capture
|
| 3 |
+
* Runs in a separate audio thread for minimal latency
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
class AudioProcessor extends AudioWorkletProcessor {
|
| 7 |
+
constructor() {
|
| 8 |
+
super();
|
| 9 |
+
this.bufferSize = 512; // Send audio every ~32ms at 16kHz
|
| 10 |
+
this.buffer = [];
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
process(inputs, outputs, parameters) {
|
| 14 |
+
const input = inputs[0];
|
| 15 |
+
if (input.length > 0) {
|
| 16 |
+
const channelData = input[0];
|
| 17 |
+
|
| 18 |
+
// Accumulate samples
|
| 19 |
+
for (let i = 0; i < channelData.length; i++) {
|
| 20 |
+
this.buffer.push(channelData[i]);
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
// Send when we have enough
|
| 24 |
+
while (this.buffer.length >= this.bufferSize) {
|
| 25 |
+
const chunk = this.buffer.splice(0, this.bufferSize);
|
| 26 |
+
this.port.postMessage({
|
| 27 |
+
audio: new Float32Array(chunk)
|
| 28 |
+
});
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
return true; // Keep processor alive
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
registerProcessor('audio-processor', AudioProcessor);
|
encoder_sleeker_fp32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3639b95297009937838ae8fa9193602c7407e60a687b1591ffd2cc4b616d87d0
|
| 3 |
+
size 31139657
|
encoder_worker.js
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Encoder Worker - Runs preprocessor + encoder in a separate thread
|
| 3 |
+
*/
|
| 4 |
+
|
| 5 |
+
importScripts('https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.min.js');
|
| 6 |
+
|
| 7 |
+
// Configure ONNX Runtime to find WASM files from CDN
|
| 8 |
+
ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/';
|
| 9 |
+
|
| 10 |
+
// Model config
|
| 11 |
+
let cfg = null;
|
| 12 |
+
let preprocessor = null;
|
| 13 |
+
let encoder = null;
|
| 14 |
+
let tailLatency = 0;
|
| 15 |
+
|
| 16 |
+
// Preprocessor state
|
| 17 |
+
let prepSession = null;
|
| 18 |
+
let prepDim = 0;
|
| 19 |
+
let prepC1 = 0;
|
| 20 |
+
let prepStateC1 = null;
|
| 21 |
+
let prepStateC2 = null;
|
| 22 |
+
|
| 23 |
+
// Encoder state
|
| 24 |
+
let encSession = null;
|
| 25 |
+
let encDim = 0;
|
| 26 |
+
let encNPast = 0;
|
| 27 |
+
let encNFuture = 0;
|
| 28 |
+
let encEncoderDepth = 0;
|
| 29 |
+
let encContextSize = 0;
|
| 30 |
+
let encInputBuffer = [];
|
| 31 |
+
let encTotalInputFrames = 0;
|
| 32 |
+
let encTotalOutputFrames = 0;
|
| 33 |
+
|
| 34 |
+
function resetPreprocessor() {
|
| 35 |
+
if (prepStateC1) prepStateC1.fill(0);
|
| 36 |
+
if (prepStateC2) prepStateC2.fill(0);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
function resetEncoder() {
|
| 40 |
+
encInputBuffer = [];
|
| 41 |
+
encTotalInputFrames = 0;
|
| 42 |
+
encTotalOutputFrames = 0;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
async function processPreprocessor(audioChunk) {
|
| 46 |
+
const feeds = {
|
| 47 |
+
'audio_chunk': new ort.Tensor('float32', audioChunk, [1, audioChunk.length]),
|
| 48 |
+
'state_c1': new ort.Tensor('float32', prepStateC1, [1, 4, prepDim]),
|
| 49 |
+
'state_c2': new ort.Tensor('float32', prepStateC2, [1, 4, prepC1])
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
const results = await prepSession.run(feeds);
|
| 53 |
+
|
| 54 |
+
// Update states
|
| 55 |
+
prepStateC1.set(results.new_state_c1.data);
|
| 56 |
+
prepStateC2.set(results.new_state_c2.data);
|
| 57 |
+
|
| 58 |
+
return {
|
| 59 |
+
data: results.features.data,
|
| 60 |
+
dims: results.features.dims
|
| 61 |
+
};
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
async function processEncoder(melData, melDims, flush = true) {
|
| 65 |
+
const newFrames = melDims[1];
|
| 66 |
+
|
| 67 |
+
// Append new frames to buffer
|
| 68 |
+
for (let f = 0; f < newFrames; f++) {
|
| 69 |
+
const frame = new Float32Array(encDim);
|
| 70 |
+
for (let d = 0; d < encDim; d++) {
|
| 71 |
+
frame[d] = melData[f * encDim + d];
|
| 72 |
+
}
|
| 73 |
+
encInputBuffer.push(frame);
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
encTotalInputFrames += newFrames;
|
| 77 |
+
|
| 78 |
+
// Calculate output range
|
| 79 |
+
const canOutput = flush
|
| 80 |
+
? encTotalInputFrames
|
| 81 |
+
: Math.max(0, encTotalInputFrames - tailLatency);
|
| 82 |
+
|
| 83 |
+
const outputFrom = flush
|
| 84 |
+
? Math.max(0, encTotalOutputFrames - tailLatency)
|
| 85 |
+
: encTotalOutputFrames;
|
| 86 |
+
|
| 87 |
+
const newOutputCount = canOutput - outputFrom;
|
| 88 |
+
|
| 89 |
+
if (newOutputCount <= 0) {
|
| 90 |
+
return { data: new Float32Array(0), dims: [1, 0, encDim] };
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
// Prepare input buffer tensor
|
| 94 |
+
const bufferFrames = encInputBuffer.length;
|
| 95 |
+
const bufferData = new Float32Array(bufferFrames * encDim);
|
| 96 |
+
for (let f = 0; f < bufferFrames; f++) {
|
| 97 |
+
bufferData.set(encInputBuffer[f], f * encDim);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
const feeds = {
|
| 101 |
+
'input': new ort.Tensor('float32', bufferData, [1, bufferFrames, encDim])
|
| 102 |
+
};
|
| 103 |
+
|
| 104 |
+
const results = await encSession.run(feeds);
|
| 105 |
+
const fullOutput = results.output;
|
| 106 |
+
|
| 107 |
+
// Calculate which frames to return
|
| 108 |
+
const bufStartFrame = encTotalInputFrames - bufferFrames;
|
| 109 |
+
const outputStart = outputFrom - bufStartFrame;
|
| 110 |
+
|
| 111 |
+
// Extract the subset of output
|
| 112 |
+
const resultData = new Float32Array(newOutputCount * encDim);
|
| 113 |
+
for (let f = 0; f < newOutputCount; f++) {
|
| 114 |
+
for (let d = 0; d < encDim; d++) {
|
| 115 |
+
resultData[f * encDim + d] = fullOutput.data[(outputStart + f) * encDim + d];
|
| 116 |
+
}
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
// Trim input buffer to context size
|
| 120 |
+
if (encInputBuffer.length > encContextSize) {
|
| 121 |
+
encInputBuffer = encInputBuffer.slice(-encContextSize);
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
encTotalOutputFrames = canOutput;
|
| 125 |
+
return { data: resultData, dims: [1, newOutputCount, encDim] };
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
self.onmessage = async function(e) {
|
| 129 |
+
const { type, data } = e.data;
|
| 130 |
+
|
| 131 |
+
switch (type) {
|
| 132 |
+
case 'init': {
|
| 133 |
+
try {
|
| 134 |
+
cfg = data.cfg;
|
| 135 |
+
const onnxUrl = data.onnxUrl;
|
| 136 |
+
const modelName = data.modelName;
|
| 137 |
+
const dtype = 'fp32';
|
| 138 |
+
|
| 139 |
+
tailLatency = cfg.n_future * cfg.encoder_depth;
|
| 140 |
+
|
| 141 |
+
// Initialize preprocessor
|
| 142 |
+
self.postMessage({ type: 'status', message: 'Loading preprocessor...' });
|
| 143 |
+
prepSession = await ort.InferenceSession.create(
|
| 144 |
+
`${onnxUrl}/preprocessor_streaming_${modelName}_${dtype}.onnx`
|
| 145 |
+
);
|
| 146 |
+
prepDim = cfg.dim;
|
| 147 |
+
prepC1 = 2 * cfg.dim;
|
| 148 |
+
prepStateC1 = new Float32Array(4 * cfg.dim);
|
| 149 |
+
prepStateC2 = new Float32Array(4 * prepC1);
|
| 150 |
+
|
| 151 |
+
// Initialize encoder
|
| 152 |
+
self.postMessage({ type: 'status', message: 'Loading encoder...' });
|
| 153 |
+
encSession = await ort.InferenceSession.create(
|
| 154 |
+
`${onnxUrl}/encoder_${modelName}_${dtype}.onnx`
|
| 155 |
+
);
|
| 156 |
+
encDim = cfg.dim;
|
| 157 |
+
encNPast = cfg.n_past;
|
| 158 |
+
encNFuture = cfg.n_future;
|
| 159 |
+
encEncoderDepth = cfg.encoder_depth;
|
| 160 |
+
encContextSize = cfg.encoder_depth * (cfg.n_past + cfg.n_future);
|
| 161 |
+
|
| 162 |
+
self.postMessage({ type: 'ready' });
|
| 163 |
+
} catch (err) {
|
| 164 |
+
self.postMessage({ type: 'error', message: err.message });
|
| 165 |
+
}
|
| 166 |
+
break;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
case 'segment_start': {
|
| 170 |
+
resetPreprocessor();
|
| 171 |
+
resetEncoder();
|
| 172 |
+
self.postMessage({
|
| 173 |
+
type: 'segment_start',
|
| 174 |
+
segmentId: data.segmentId
|
| 175 |
+
});
|
| 176 |
+
break;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
case 'segment_end': {
|
| 180 |
+
self.postMessage({
|
| 181 |
+
type: 'segment_end',
|
| 182 |
+
segmentId: data.segmentId
|
| 183 |
+
});
|
| 184 |
+
break;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
case 'audio': {
|
| 188 |
+
try {
|
| 189 |
+
// Process through preprocessor
|
| 190 |
+
const mel = await processPreprocessor(new Float32Array(data.audio));
|
| 191 |
+
|
| 192 |
+
const audioMs = (data.audio.length / 16000 * 1000).toFixed(0);
|
| 193 |
+
console.log(`Audio ${data.audio.length} samples (${audioMs}ms) → Mel ${mel.dims[1]} frames`);
|
| 194 |
+
|
| 195 |
+
// Process through encoder with flush=true
|
| 196 |
+
const enc = await processEncoder(mel.data, mel.dims, true);
|
| 197 |
+
|
| 198 |
+
console.log(`Mel ${mel.dims[1]} frames → Encoder ${enc.dims[1]} frames (accumulated: ${encTotalOutputFrames})`);
|
| 199 |
+
|
| 200 |
+
if (enc.dims[1] > 0) {
|
| 201 |
+
self.postMessage({
|
| 202 |
+
type: 'features',
|
| 203 |
+
segmentId: data.segmentId,
|
| 204 |
+
features: enc.data,
|
| 205 |
+
dims: enc.dims
|
| 206 |
+
}, [enc.data.buffer]); // Transfer ownership
|
| 207 |
+
}
|
| 208 |
+
} catch (err) {
|
| 209 |
+
console.error('Encoder error:', err);
|
| 210 |
+
}
|
| 211 |
+
break;
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
};
|
index.html
CHANGED
|
@@ -1,19 +1,427 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
</html>
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Streaming ASR Demo - Moonshine</title>
|
| 7 |
+
<style>
|
| 8 |
+
* {
|
| 9 |
+
box-sizing: border-box;
|
| 10 |
+
margin: 0;
|
| 11 |
+
padding: 0;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
body {
|
| 15 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 16 |
+
background: #1a1a2e;
|
| 17 |
+
color: #eee;
|
| 18 |
+
min-height: 100vh;
|
| 19 |
+
padding: 20px;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
.container {
|
| 23 |
+
max-width: 900px;
|
| 24 |
+
margin: 0 auto;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
h1 {
|
| 28 |
+
text-align: center;
|
| 29 |
+
margin-bottom: 20px;
|
| 30 |
+
color: #00d4ff;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.status-bar {
|
| 34 |
+
display: flex;
|
| 35 |
+
justify-content: space-between;
|
| 36 |
+
align-items: center;
|
| 37 |
+
background: #16213e;
|
| 38 |
+
padding: 15px 20px;
|
| 39 |
+
border-radius: 10px;
|
| 40 |
+
margin-bottom: 20px;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.status-indicator {
|
| 44 |
+
display: flex;
|
| 45 |
+
align-items: center;
|
| 46 |
+
gap: 10px;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.status-dot {
|
| 50 |
+
width: 12px;
|
| 51 |
+
height: 12px;
|
| 52 |
+
border-radius: 50%;
|
| 53 |
+
background: #666;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
.status-dot.idle { background: #666; }
|
| 57 |
+
.status-dot.listening { background: #00ff88; animation: pulse 1s infinite; }
|
| 58 |
+
.status-dot.recording { background: #ff4444; animation: pulse 0.5s infinite; }
|
| 59 |
+
|
| 60 |
+
@keyframes pulse {
|
| 61 |
+
0%, 100% { opacity: 1; }
|
| 62 |
+
50% { opacity: 0.5; }
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.controls {
|
| 66 |
+
display: flex;
|
| 67 |
+
gap: 10px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
button {
|
| 71 |
+
padding: 10px 20px;
|
| 72 |
+
border: none;
|
| 73 |
+
border-radius: 5px;
|
| 74 |
+
cursor: pointer;
|
| 75 |
+
font-size: 14px;
|
| 76 |
+
transition: all 0.2s;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
button:disabled {
|
| 80 |
+
opacity: 0.5;
|
| 81 |
+
cursor: not-allowed;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
.btn-primary {
|
| 85 |
+
background: #00d4ff;
|
| 86 |
+
color: #1a1a2e;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.btn-primary:hover:not(:disabled) {
|
| 90 |
+
background: #00a8cc;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.btn-danger {
|
| 94 |
+
background: #ff4444;
|
| 95 |
+
color: white;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.btn-danger:hover:not(:disabled) {
|
| 99 |
+
background: #cc3333;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.vad-section {
|
| 103 |
+
background: #16213e;
|
| 104 |
+
padding: 20px;
|
| 105 |
+
border-radius: 10px;
|
| 106 |
+
margin-bottom: 20px;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
.vad-section h3 {
|
| 110 |
+
margin-bottom: 15px;
|
| 111 |
+
color: #00d4ff;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.vad-graph {
|
| 115 |
+
background: #0f0f23;
|
| 116 |
+
border-radius: 5px;
|
| 117 |
+
padding: 10px;
|
| 118 |
+
height: 120px;
|
| 119 |
+
position: relative;
|
| 120 |
+
overflow: hidden;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
.vad-canvas {
|
| 124 |
+
width: 100%;
|
| 125 |
+
height: 100%;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.vad-bar {
|
| 129 |
+
display: flex;
|
| 130 |
+
align-items: center;
|
| 131 |
+
gap: 10px;
|
| 132 |
+
margin-top: 15px;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
.vad-bar-container {
|
| 136 |
+
flex: 1;
|
| 137 |
+
height: 20px;
|
| 138 |
+
background: #0f0f23;
|
| 139 |
+
border-radius: 10px;
|
| 140 |
+
overflow: hidden;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
.vad-bar-fill {
|
| 144 |
+
height: 100%;
|
| 145 |
+
background: linear-gradient(90deg, #00ff88, #00d4ff);
|
| 146 |
+
width: 0%;
|
| 147 |
+
transition: width 0.1s;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
.vad-value {
|
| 151 |
+
min-width: 50px;
|
| 152 |
+
text-align: right;
|
| 153 |
+
font-family: monospace;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
.pipeline-status {
|
| 157 |
+
display: flex;
|
| 158 |
+
gap: 15px;
|
| 159 |
+
margin-top: 15px;
|
| 160 |
+
font-family: monospace;
|
| 161 |
+
font-size: 12px;
|
| 162 |
+
color: #888;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
.transcripts-section {
|
| 166 |
+
background: #16213e;
|
| 167 |
+
padding: 20px;
|
| 168 |
+
border-radius: 10px;
|
| 169 |
+
min-height: 300px;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.transcripts-section h3 {
|
| 173 |
+
margin-bottom: 15px;
|
| 174 |
+
color: #00d4ff;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
.transcripts-list {
|
| 178 |
+
max-height: 200px;
|
| 179 |
+
overflow-y: auto;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
.transcript-item {
|
| 183 |
+
padding: 10px 15px;
|
| 184 |
+
background: #0f0f23;
|
| 185 |
+
border-radius: 5px;
|
| 186 |
+
margin-bottom: 10px;
|
| 187 |
+
display: flex;
|
| 188 |
+
gap: 10px;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.transcript-duration {
|
| 192 |
+
color: #00d4ff;
|
| 193 |
+
font-family: monospace;
|
| 194 |
+
min-width: 50px;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.transcript-text {
|
| 198 |
+
flex: 1;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
.live-caption {
|
| 202 |
+
padding: 30px 40px;
|
| 203 |
+
background: rgba(0, 0, 0, 0.75);
|
| 204 |
+
border-radius: 12px;
|
| 205 |
+
margin-top: 25px;
|
| 206 |
+
min-height: 100px;
|
| 207 |
+
display: flex;
|
| 208 |
+
flex-direction: column;
|
| 209 |
+
justify-content: center;
|
| 210 |
+
align-items: center;
|
| 211 |
+
text-align: center;
|
| 212 |
+
backdrop-filter: blur(10px);
|
| 213 |
+
border: 1px solid rgba(255, 255, 255, 0.1);
|
| 214 |
+
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
|
| 215 |
+
transition: all 0.3s ease;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
.live-caption.active {
|
| 219 |
+
background: rgba(0, 0, 0, 0.85);
|
| 220 |
+
border-color: rgba(255, 68, 68, 0.3);
|
| 221 |
+
box-shadow: 0 8px 32px rgba(255, 68, 68, 0.15);
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
.live-caption-label {
|
| 225 |
+
font-size: 11px;
|
| 226 |
+
color: rgba(255, 255, 255, 0.5);
|
| 227 |
+
text-transform: uppercase;
|
| 228 |
+
letter-spacing: 2px;
|
| 229 |
+
margin-bottom: 12px;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
.live-caption.active .live-caption-label {
|
| 233 |
+
color: #ff6b6b;
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
.live-caption-text {
|
| 237 |
+
font-size: 28px;
|
| 238 |
+
font-weight: 400;
|
| 239 |
+
line-height: 1.5;
|
| 240 |
+
min-height: 40px;
|
| 241 |
+
color: #ffffff;
|
| 242 |
+
text-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
|
| 243 |
+
max-width: 100%;
|
| 244 |
+
word-wrap: break-word;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
.live-caption-text.placeholder {
|
| 248 |
+
color: rgba(255, 255, 255, 0.35);
|
| 249 |
+
font-style: italic;
|
| 250 |
+
font-size: 20px;
|
| 251 |
+
font-weight: 300;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
.config-section {
|
| 255 |
+
background: #16213e;
|
| 256 |
+
padding: 20px;
|
| 257 |
+
border-radius: 10px;
|
| 258 |
+
margin-bottom: 20px;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
.config-section h3 {
|
| 262 |
+
margin-bottom: 15px;
|
| 263 |
+
color: #00d4ff;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
.config-grid {
|
| 267 |
+
display: grid;
|
| 268 |
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 269 |
+
gap: 15px;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.config-item {
|
| 273 |
+
display: flex;
|
| 274 |
+
flex-direction: column;
|
| 275 |
+
gap: 5px;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
.config-item label {
|
| 279 |
+
font-size: 12px;
|
| 280 |
+
color: #888;
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
.config-item select, .config-item input {
|
| 284 |
+
padding: 8px;
|
| 285 |
+
border: 1px solid #333;
|
| 286 |
+
border-radius: 5px;
|
| 287 |
+
background: #0f0f23;
|
| 288 |
+
color: #eee;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
.loading-overlay {
|
| 292 |
+
position: fixed;
|
| 293 |
+
top: 0;
|
| 294 |
+
left: 0;
|
| 295 |
+
right: 0;
|
| 296 |
+
bottom: 0;
|
| 297 |
+
background: rgba(0, 0, 0, 0.8);
|
| 298 |
+
display: flex;
|
| 299 |
+
justify-content: center;
|
| 300 |
+
align-items: center;
|
| 301 |
+
z-index: 1000;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.loading-overlay.hidden {
|
| 305 |
+
display: none;
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
.loading-content {
|
| 309 |
+
text-align: center;
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
.loading-spinner {
|
| 313 |
+
width: 50px;
|
| 314 |
+
height: 50px;
|
| 315 |
+
border: 3px solid #333;
|
| 316 |
+
border-top-color: #00d4ff;
|
| 317 |
+
border-radius: 50%;
|
| 318 |
+
animation: spin 1s linear infinite;
|
| 319 |
+
margin: 0 auto 20px;
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
@keyframes spin {
|
| 323 |
+
to { transform: rotate(360deg); }
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
.loading-text {
|
| 327 |
+
color: #00d4ff;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
.error-message {
|
| 331 |
+
background: #ff4444;
|
| 332 |
+
color: white;
|
| 333 |
+
padding: 15px;
|
| 334 |
+
border-radius: 5px;
|
| 335 |
+
margin-bottom: 20px;
|
| 336 |
+
display: none;
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
.error-message.visible {
|
| 340 |
+
display: block;
|
| 341 |
+
}
|
| 342 |
+
</style>
|
| 343 |
+
</head>
|
| 344 |
+
<body>
|
| 345 |
+
<div class="loading-overlay hidden" id="loadingOverlay">
|
| 346 |
+
<div class="loading-content">
|
| 347 |
+
<div class="loading-spinner"></div>
|
| 348 |
+
<div class="loading-text" id="loadingText">Loading models...</div>
|
| 349 |
+
</div>
|
| 350 |
+
</div>
|
| 351 |
+
|
| 352 |
+
<div class="container">
|
| 353 |
+
<h1>Streaming ASR Demo</h1>
|
| 354 |
+
|
| 355 |
+
<div class="error-message" id="errorMessage"></div>
|
| 356 |
+
|
| 357 |
+
<div class="config-section">
|
| 358 |
+
<h3>Configuration</h3>
|
| 359 |
+
<div class="config-grid">
|
| 360 |
+
<div class="config-item">
|
| 361 |
+
<label>Model</label>
|
| 362 |
+
<select id="modelSelect">
|
| 363 |
+
<option value="sleeker">Moonshine Sleeker</option>
|
| 364 |
+
<option value="spindlier">Moonshine Spindlier</option>
|
| 365 |
+
</select>
|
| 366 |
+
</div>
|
| 367 |
+
<div class="config-item">
|
| 368 |
+
<label>ONNX Files URL</label>
|
| 369 |
+
<input type="text" id="onnxUrl" placeholder="e.g., ./models or https://..." value="./models">
|
| 370 |
+
</div>
|
| 371 |
+
<div class="config-item">
|
| 372 |
+
<label>Onset Threshold</label>
|
| 373 |
+
<input type="number" id="onsetThreshold" value="0.4" min="0" max="1" step="0.1">
|
| 374 |
+
</div>
|
| 375 |
+
<div class="config-item">
|
| 376 |
+
<label>Offset Threshold</label>
|
| 377 |
+
<input type="number" id="offsetThreshold" value="0.3" min="0" max="1" step="0.1">
|
| 378 |
+
</div>
|
| 379 |
+
</div>
|
| 380 |
+
</div>
|
| 381 |
+
|
| 382 |
+
<div class="status-bar">
|
| 383 |
+
<div class="status-indicator">
|
| 384 |
+
<div class="status-dot" id="statusDot"></div>
|
| 385 |
+
<span id="statusText">Ready</span>
|
| 386 |
+
</div>
|
| 387 |
+
<div class="controls">
|
| 388 |
+
<button class="btn-primary" id="startBtn">Start Listening</button>
|
| 389 |
+
<button class="btn-danger" id="stopBtn" disabled>Stop</button>
|
| 390 |
+
</div>
|
| 391 |
+
</div>
|
| 392 |
+
|
| 393 |
+
<div class="vad-section">
|
| 394 |
+
<h3>Voice Activity Detection</h3>
|
| 395 |
+
<div class="vad-graph">
|
| 396 |
+
<canvas id="vadCanvas" class="vad-canvas"></canvas>
|
| 397 |
+
</div>
|
| 398 |
+
<div class="vad-bar">
|
| 399 |
+
<span>VAD:</span>
|
| 400 |
+
<div class="vad-bar-container">
|
| 401 |
+
<div class="vad-bar-fill" id="vadBarFill"></div>
|
| 402 |
+
</div>
|
| 403 |
+
<span class="vad-value" id="vadValue">0%</span>
|
| 404 |
+
</div>
|
| 405 |
+
<div class="pipeline-status">
|
| 406 |
+
<span>audio_q: <span id="audioQueueSize">0</span></span>
|
| 407 |
+
<span>features_q: <span id="featuresQueueSize">0</span></span>
|
| 408 |
+
<span>dropped: <span id="droppedChunks">0</span></span>
|
| 409 |
+
</div>
|
| 410 |
+
</div>
|
| 411 |
+
|
| 412 |
+
<div class="transcripts-section">
|
| 413 |
+
<h3>Transcripts</h3>
|
| 414 |
+
<div class="transcripts-list" id="transcriptsList"></div>
|
| 415 |
+
<div class="live-caption" id="liveCaption">
|
| 416 |
+
<div class="live-caption-label">Live Caption</div>
|
| 417 |
+
<div class="live-caption-text placeholder" id="liveCaptionText">
|
| 418 |
+
Waiting for speech...
|
| 419 |
+
</div>
|
| 420 |
+
</div>
|
| 421 |
+
</div>
|
| 422 |
+
</div>
|
| 423 |
+
|
| 424 |
+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.min.js"></script>
|
| 425 |
+
<script type="module" src="streaming_asr.js"></script>
|
| 426 |
+
</body>
|
| 427 |
</html>
|
streaming_asr.js
ADDED
|
@@ -0,0 +1,801 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Streaming ASR Demo - JavaScript Implementation with Web Workers
|
| 3 |
+
*
|
| 4 |
+
* Architecture:
|
| 5 |
+
* - Main thread: Audio capture, VAD, UI updates
|
| 6 |
+
* - Encoder Worker: Preprocessor + Encoder ONNX inference
|
| 7 |
+
* - Decoder Worker: Adapter + Decoder ONNX inference
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
// =============================================================================
|
| 11 |
+
// Constants
|
| 12 |
+
// =============================================================================
|
| 13 |
+
|
| 14 |
+
const SAMPLE_RATE = 16000;
|
| 15 |
+
const VAD_CHUNK_SAMPLES = 160; // 10ms - optimal for TenVAD
|
| 16 |
+
const ASR_CHUNK_SAMPLES = 320; // 20ms - Moonshine frame size
|
| 17 |
+
const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder
|
| 18 |
+
|
| 19 |
+
const PRE_BUFFER_CHUNKS = 15; // ~300ms at 20ms chunks
|
| 20 |
+
const POST_BUFFER_CHUNKS = 3; // ~60ms at 20ms chunks
|
| 21 |
+
|
| 22 |
+
const MODEL_CONFIGS = {
|
| 23 |
+
sleeker: {
|
| 24 |
+
dim: 336,
|
| 25 |
+
dec_dim: 288,
|
| 26 |
+
depth: 6,
|
| 27 |
+
encoder_depth: 6,
|
| 28 |
+
n_past: 16,
|
| 29 |
+
n_future: 4,
|
| 30 |
+
nheads: 8,
|
| 31 |
+
head_dim: 36,
|
| 32 |
+
vocab_size: 32768
|
| 33 |
+
},
|
| 34 |
+
spindlier: {
|
| 35 |
+
dim: 620,
|
| 36 |
+
dec_dim: 512,
|
| 37 |
+
depth: 10,
|
| 38 |
+
encoder_depth: 10,
|
| 39 |
+
n_past: 16,
|
| 40 |
+
n_future: 4,
|
| 41 |
+
nheads: 8,
|
| 42 |
+
head_dim: 64,
|
| 43 |
+
vocab_size: 32768
|
| 44 |
+
}
|
| 45 |
+
};
|
| 46 |
+
|
| 47 |
+
// =============================================================================
|
| 48 |
+
// TenVAD - WebAssembly-based Voice Activity Detection
|
| 49 |
+
// =============================================================================
|
| 50 |
+
|
| 51 |
+
class TenVAD {
|
| 52 |
+
constructor(hopSize = 160, threshold = 0.5) {
|
| 53 |
+
this.hopSize = hopSize;
|
| 54 |
+
this.threshold = threshold;
|
| 55 |
+
this.module = null;
|
| 56 |
+
this.vadHandle = null;
|
| 57 |
+
this.audioPtr = null;
|
| 58 |
+
this.probPtr = null;
|
| 59 |
+
this.flagPtr = null;
|
| 60 |
+
this.ready = false;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
async init(wasmUrl = './ten_vad.js') {
|
| 64 |
+
const wasmBinaryUrl = wasmUrl.replace('.js', '.wasm');
|
| 65 |
+
|
| 66 |
+
// Dynamic import of the ES module
|
| 67 |
+
const vadModule = await import(wasmUrl);
|
| 68 |
+
const createTenVadModule = vadModule.default;
|
| 69 |
+
|
| 70 |
+
this.module = await createTenVadModule({
|
| 71 |
+
locateFile: (path) => {
|
| 72 |
+
if (path.endsWith('.wasm')) {
|
| 73 |
+
return wasmBinaryUrl;
|
| 74 |
+
}
|
| 75 |
+
return path;
|
| 76 |
+
}
|
| 77 |
+
});
|
| 78 |
+
|
| 79 |
+
// Create VAD instance
|
| 80 |
+
const vadHandlePtr = this.module._malloc(4);
|
| 81 |
+
const result = this.module._ten_vad_create(vadHandlePtr, this.hopSize, this.threshold);
|
| 82 |
+
|
| 83 |
+
if (result !== 0) {
|
| 84 |
+
this.module._free(vadHandlePtr);
|
| 85 |
+
throw new Error(`Failed to create TenVAD instance: ${result}`);
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
this.vadHandle = this.module.HEAP32[vadHandlePtr / 4];
|
| 89 |
+
this.module._free(vadHandlePtr);
|
| 90 |
+
|
| 91 |
+
// Allocate buffers
|
| 92 |
+
this.audioPtr = this.module._malloc(this.hopSize * 2);
|
| 93 |
+
this.probPtr = this.module._malloc(4);
|
| 94 |
+
this.flagPtr = this.module._malloc(4);
|
| 95 |
+
|
| 96 |
+
this.ready = true;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
process(audioChunkFloat32) {
|
| 100 |
+
if (!this.ready) return -1;
|
| 101 |
+
|
| 102 |
+
const int16Data = new Int16Array(this.hopSize);
|
| 103 |
+
for (let i = 0; i < this.hopSize && i < audioChunkFloat32.length; i++) {
|
| 104 |
+
int16Data[i] = Math.max(-32768, Math.min(32767, Math.round(audioChunkFloat32[i] * 32767)));
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
this.module.HEAP16.set(int16Data, this.audioPtr / 2);
|
| 108 |
+
this.module._ten_vad_process(this.vadHandle, this.audioPtr, this.hopSize, this.probPtr, this.flagPtr);
|
| 109 |
+
|
| 110 |
+
return this.module.HEAPF32[this.probPtr / 4];
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
destroy() {
|
| 114 |
+
if (!this.ready || !this.module) return;
|
| 115 |
+
|
| 116 |
+
this.ready = false; // Prevent further use
|
| 117 |
+
|
| 118 |
+
try {
|
| 119 |
+
if (this.audioPtr) {
|
| 120 |
+
this.module._free(this.audioPtr);
|
| 121 |
+
this.audioPtr = null;
|
| 122 |
+
}
|
| 123 |
+
if (this.probPtr) {
|
| 124 |
+
this.module._free(this.probPtr);
|
| 125 |
+
this.probPtr = null;
|
| 126 |
+
}
|
| 127 |
+
if (this.flagPtr) {
|
| 128 |
+
this.module._free(this.flagPtr);
|
| 129 |
+
this.flagPtr = null;
|
| 130 |
+
}
|
| 131 |
+
// Skip _ten_vad_destroy as it causes memory access errors
|
| 132 |
+
// The WASM memory will be cleaned up when the module is garbage collected
|
| 133 |
+
this.vadHandle = null;
|
| 134 |
+
} catch (e) {
|
| 135 |
+
console.warn('TenVAD cleanup error:', e);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
this.module = null;
|
| 139 |
+
}
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
// Fallback simple energy-based VAD
|
| 143 |
+
class SimpleVAD {
|
| 144 |
+
constructor(sampleRate = 16000, frameSize = 160) {
|
| 145 |
+
this.frameSize = frameSize;
|
| 146 |
+
this.energyHistory = [];
|
| 147 |
+
this.historySize = 50;
|
| 148 |
+
this.noiseFloor = 0.001;
|
| 149 |
+
this.ready = true;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
async init() {}
|
| 153 |
+
|
| 154 |
+
process(audioChunk) {
|
| 155 |
+
let energy = 0;
|
| 156 |
+
for (let i = 0; i < audioChunk.length; i++) {
|
| 157 |
+
energy += audioChunk[i] * audioChunk[i];
|
| 158 |
+
}
|
| 159 |
+
energy = Math.sqrt(energy / audioChunk.length);
|
| 160 |
+
|
| 161 |
+
this.energyHistory.push(energy);
|
| 162 |
+
if (this.energyHistory.length > this.historySize) {
|
| 163 |
+
this.energyHistory.shift();
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
if (this.energyHistory.length > 10) {
|
| 167 |
+
const sorted = [...this.energyHistory].sort((a, b) => a - b);
|
| 168 |
+
this.noiseFloor = sorted[Math.floor(sorted.length * 0.1)] || 0.001;
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
const snr = energy / (this.noiseFloor + 1e-10);
|
| 172 |
+
return 1 / (1 + Math.exp(-2 * (snr - 3)));
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
destroy() {}
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
// =============================================================================
|
| 179 |
+
// Pipelined Streaming ASR with Web Workers
|
| 180 |
+
// =============================================================================
|
| 181 |
+
|
| 182 |
+
class PipelinedStreamingASR {
|
| 183 |
+
constructor(config) {
|
| 184 |
+
this.modelName = config.modelName || 'sleeker';
|
| 185 |
+
this.onnxUrl = config.onnxUrl || './models';
|
| 186 |
+
this.onsetThreshold = config.onsetThreshold || 0.5;
|
| 187 |
+
this.offsetThreshold = config.offsetThreshold || 0.3;
|
| 188 |
+
this.emaAlpha = config.emaAlpha || 0.3;
|
| 189 |
+
|
| 190 |
+
this.cfg = MODEL_CONFIGS[this.modelName];
|
| 191 |
+
|
| 192 |
+
// Workers
|
| 193 |
+
this.encoderWorker = null;
|
| 194 |
+
this.decoderWorker = null;
|
| 195 |
+
this.encoderReady = false;
|
| 196 |
+
this.decoderReady = false;
|
| 197 |
+
|
| 198 |
+
// VAD (runs on main thread for low latency)
|
| 199 |
+
this.vad = null;
|
| 200 |
+
|
| 201 |
+
// Audio capture
|
| 202 |
+
this.audioContext = null;
|
| 203 |
+
this.sourceNode = null;
|
| 204 |
+
this.workletNode = null;
|
| 205 |
+
|
| 206 |
+
// State
|
| 207 |
+
this.running = false;
|
| 208 |
+
this.state = 'idle';
|
| 209 |
+
this.currentSegmentId = 0;
|
| 210 |
+
this.emaProb = 0;
|
| 211 |
+
this.onsetCounter = 0;
|
| 212 |
+
this.offsetCounter = 0;
|
| 213 |
+
|
| 214 |
+
// Buffers
|
| 215 |
+
this.vadBuffer = [];
|
| 216 |
+
this.asrBuffer = [];
|
| 217 |
+
this.preBuffer = [];
|
| 218 |
+
this.postBufferRemaining = 0;
|
| 219 |
+
this.encoderBatchBuffer = []; // Accumulate 320ms before sending to encoder
|
| 220 |
+
|
| 221 |
+
// Display state
|
| 222 |
+
this.vadHistory = [];
|
| 223 |
+
this.vadUpdateCounter = 0;
|
| 224 |
+
this.vadUpdateInterval = 5; // Update display every 5 VAD chunks (50ms)
|
| 225 |
+
|
| 226 |
+
// Callbacks
|
| 227 |
+
this.onVadUpdate = null;
|
| 228 |
+
this.onTranscript = null;
|
| 229 |
+
this.onLiveCaption = null;
|
| 230 |
+
this.onStatusUpdate = null;
|
| 231 |
+
this.onQueueUpdate = null;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
async loadModels(progressCallback) {
|
| 235 |
+
// Initialize VAD
|
| 236 |
+
try {
|
| 237 |
+
progressCallback?.('Loading TenVAD...');
|
| 238 |
+
this.vad = new TenVAD(VAD_CHUNK_SAMPLES, 0.5);
|
| 239 |
+
await this.vad.init('./ten_vad.js');
|
| 240 |
+
console.log('Using TenVAD');
|
| 241 |
+
} catch (e) {
|
| 242 |
+
console.warn('TenVAD failed, using SimpleVAD:', e.message);
|
| 243 |
+
this.vad = new SimpleVAD(SAMPLE_RATE, VAD_CHUNK_SAMPLES);
|
| 244 |
+
await this.vad.init();
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
// Initialize Encoder Worker
|
| 248 |
+
progressCallback?.('Loading encoder...');
|
| 249 |
+
await this.initEncoderWorker();
|
| 250 |
+
|
| 251 |
+
// Initialize Decoder Worker
|
| 252 |
+
progressCallback?.('Loading decoder...');
|
| 253 |
+
await this.initDecoderWorker();
|
| 254 |
+
|
| 255 |
+
progressCallback?.('Ready!');
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
initEncoderWorker() {
|
| 259 |
+
return new Promise((resolve, reject) => {
|
| 260 |
+
this.encoderWorker = new Worker('./encoder_worker.js');
|
| 261 |
+
|
| 262 |
+
this.encoderWorker.onmessage = (e) => {
|
| 263 |
+
const { type, data } = e.data;
|
| 264 |
+
|
| 265 |
+
switch (type) {
|
| 266 |
+
case 'ready':
|
| 267 |
+
this.encoderReady = true;
|
| 268 |
+
resolve();
|
| 269 |
+
break;
|
| 270 |
+
case 'error':
|
| 271 |
+
reject(new Error(e.data.message));
|
| 272 |
+
break;
|
| 273 |
+
case 'status':
|
| 274 |
+
// Progress update from worker
|
| 275 |
+
break;
|
| 276 |
+
case 'segment_start':
|
| 277 |
+
this.decoderWorker?.postMessage({ type: 'segment_start', data: { segmentId: e.data.segmentId } });
|
| 278 |
+
break;
|
| 279 |
+
case 'segment_end':
|
| 280 |
+
this.decoderWorker?.postMessage({ type: 'segment_end', data: { segmentId: e.data.segmentId } });
|
| 281 |
+
break;
|
| 282 |
+
case 'features':
|
| 283 |
+
// Forward features to decoder worker
|
| 284 |
+
this.decoderWorker?.postMessage({
|
| 285 |
+
type: 'features',
|
| 286 |
+
data: {
|
| 287 |
+
segmentId: e.data.segmentId,
|
| 288 |
+
features: e.data.features,
|
| 289 |
+
dims: e.data.dims
|
| 290 |
+
}
|
| 291 |
+
}, [e.data.features.buffer]);
|
| 292 |
+
break;
|
| 293 |
+
}
|
| 294 |
+
};
|
| 295 |
+
|
| 296 |
+
this.encoderWorker.postMessage({
|
| 297 |
+
type: 'init',
|
| 298 |
+
data: {
|
| 299 |
+
cfg: this.cfg,
|
| 300 |
+
onnxUrl: this.onnxUrl,
|
| 301 |
+
modelName: this.modelName
|
| 302 |
+
}
|
| 303 |
+
});
|
| 304 |
+
});
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
initDecoderWorker() {
|
| 308 |
+
return new Promise((resolve, reject) => {
|
| 309 |
+
this.decoderWorker = new Worker('./decoder_worker.js');
|
| 310 |
+
|
| 311 |
+
this.decoderWorker.onmessage = (e) => {
|
| 312 |
+
const { type } = e.data;
|
| 313 |
+
|
| 314 |
+
switch (type) {
|
| 315 |
+
case 'ready':
|
| 316 |
+
this.decoderReady = true;
|
| 317 |
+
resolve();
|
| 318 |
+
break;
|
| 319 |
+
case 'error':
|
| 320 |
+
reject(new Error(e.data.message));
|
| 321 |
+
break;
|
| 322 |
+
case 'status':
|
| 323 |
+
break;
|
| 324 |
+
case 'transcript':
|
| 325 |
+
this.onTranscript?.(e.data.text, e.data.segmentId);
|
| 326 |
+
break;
|
| 327 |
+
case 'live_caption':
|
| 328 |
+
this.onLiveCaption?.(e.data.text);
|
| 329 |
+
break;
|
| 330 |
+
}
|
| 331 |
+
};
|
| 332 |
+
|
| 333 |
+
this.decoderWorker.postMessage({
|
| 334 |
+
type: 'init',
|
| 335 |
+
data: {
|
| 336 |
+
cfg: this.cfg,
|
| 337 |
+
onnxUrl: this.onnxUrl,
|
| 338 |
+
modelName: this.modelName
|
| 339 |
+
}
|
| 340 |
+
});
|
| 341 |
+
});
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
async start() {
|
| 345 |
+
if (this.running) return;
|
| 346 |
+
|
| 347 |
+
const stream = await navigator.mediaDevices.getUserMedia({
|
| 348 |
+
audio: {
|
| 349 |
+
sampleRate: SAMPLE_RATE,
|
| 350 |
+
channelCount: 1,
|
| 351 |
+
echoCancellation: false,
|
| 352 |
+
noiseSuppression: false,
|
| 353 |
+
autoGainControl: false
|
| 354 |
+
}
|
| 355 |
+
});
|
| 356 |
+
|
| 357 |
+
this.audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
|
| 358 |
+
|
| 359 |
+
// Check actual sample rate
|
| 360 |
+
console.log(`Requested sample rate: ${SAMPLE_RATE}, Actual: ${this.audioContext.sampleRate}`);
|
| 361 |
+
this.sourceNode = this.audioContext.createMediaStreamSource(stream);
|
| 362 |
+
|
| 363 |
+
// Use AudioWorklet for better performance
|
| 364 |
+
try {
|
| 365 |
+
await this.audioContext.audioWorklet.addModule('./audio_processor.js');
|
| 366 |
+
this.workletNode = new AudioWorkletNode(this.audioContext, 'audio-processor');
|
| 367 |
+
|
| 368 |
+
this.workletNode.port.onmessage = (e) => {
|
| 369 |
+
if (this.running) {
|
| 370 |
+
this.processAudioChunk(e.data.audio);
|
| 371 |
+
}
|
| 372 |
+
};
|
| 373 |
+
|
| 374 |
+
this.sourceNode.connect(this.workletNode);
|
| 375 |
+
this.workletNode.connect(this.audioContext.destination);
|
| 376 |
+
} catch (e) {
|
| 377 |
+
// Fallback to ScriptProcessor
|
| 378 |
+
console.warn('AudioWorklet not available, using ScriptProcessor');
|
| 379 |
+
const bufferSize = 2048;
|
| 380 |
+
this.scriptNode = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
|
| 381 |
+
|
| 382 |
+
this.scriptNode.onaudioprocess = (e) => {
|
| 383 |
+
if (this.running) {
|
| 384 |
+
const inputData = e.inputBuffer.getChannelData(0);
|
| 385 |
+
this.processAudioChunk(new Float32Array(inputData));
|
| 386 |
+
}
|
| 387 |
+
};
|
| 388 |
+
|
| 389 |
+
this.sourceNode.connect(this.scriptNode);
|
| 390 |
+
this.scriptNode.connect(this.audioContext.destination);
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
this.running = true;
|
| 394 |
+
this.state = 'idle';
|
| 395 |
+
this.onsetCounter = 0;
|
| 396 |
+
this.offsetCounter = 0;
|
| 397 |
+
this.emaProb = 0;
|
| 398 |
+
|
| 399 |
+
this.onStatusUpdate?.('listening', 'Listening...');
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
stop() {
|
| 403 |
+
this.running = false;
|
| 404 |
+
|
| 405 |
+
if (this.workletNode) {
|
| 406 |
+
this.workletNode.disconnect();
|
| 407 |
+
this.workletNode = null;
|
| 408 |
+
}
|
| 409 |
+
if (this.scriptNode) {
|
| 410 |
+
this.scriptNode.disconnect();
|
| 411 |
+
this.scriptNode = null;
|
| 412 |
+
}
|
| 413 |
+
if (this.sourceNode) {
|
| 414 |
+
this.sourceNode.disconnect();
|
| 415 |
+
this.sourceNode = null;
|
| 416 |
+
}
|
| 417 |
+
if (this.audioContext) {
|
| 418 |
+
this.audioContext.close();
|
| 419 |
+
this.audioContext = null;
|
| 420 |
+
}
|
| 421 |
+
if (this.vad) {
|
| 422 |
+
this.vad.destroy();
|
| 423 |
+
}
|
| 424 |
+
if (this.encoderWorker) {
|
| 425 |
+
this.encoderWorker.terminate();
|
| 426 |
+
this.encoderWorker = null;
|
| 427 |
+
}
|
| 428 |
+
if (this.decoderWorker) {
|
| 429 |
+
this.decoderWorker.terminate();
|
| 430 |
+
this.decoderWorker = null;
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
this.onStatusUpdate?.('idle', 'Stopped');
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
processAudioChunk(audioData) {
|
| 437 |
+
// Accumulate for VAD (10ms chunks)
|
| 438 |
+
this.vadBuffer.push(...audioData);
|
| 439 |
+
|
| 440 |
+
// Accumulate for ASR (20ms chunks)
|
| 441 |
+
this.asrBuffer.push(...audioData);
|
| 442 |
+
|
| 443 |
+
// Process VAD chunks
|
| 444 |
+
while (this.vadBuffer.length >= VAD_CHUNK_SAMPLES) {
|
| 445 |
+
const vadChunk = new Float32Array(this.vadBuffer.splice(0, VAD_CHUNK_SAMPLES));
|
| 446 |
+
const prob = this.vad.process(vadChunk);
|
| 447 |
+
|
| 448 |
+
if (prob >= 0) {
|
| 449 |
+
this.emaProb = this.emaAlpha * prob + (1 - this.emaAlpha) * this.emaProb;
|
| 450 |
+
|
| 451 |
+
// Throttle display updates (every 50ms instead of 10ms)
|
| 452 |
+
this.vadUpdateCounter++;
|
| 453 |
+
if (this.vadUpdateCounter >= this.vadUpdateInterval) {
|
| 454 |
+
this.vadUpdateCounter = 0;
|
| 455 |
+
this.vadHistory.push(this.emaProb);
|
| 456 |
+
if (this.vadHistory.length > 100) this.vadHistory.shift();
|
| 457 |
+
this.onVadUpdate?.(this.emaProb, this.vadHistory);
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
this.updateSegmentState();
|
| 461 |
+
}
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
// Extract complete ASR chunks
|
| 465 |
+
while (this.asrBuffer.length >= ASR_CHUNK_SAMPLES) {
|
| 466 |
+
const chunkData = this.asrBuffer.splice(0, ASR_CHUNK_SAMPLES);
|
| 467 |
+
const chunk = new Float32Array(chunkData);
|
| 468 |
+
|
| 469 |
+
if (this.state === 'speech') {
|
| 470 |
+
this.sendAudioToEncoder(chunk);
|
| 471 |
+
} else {
|
| 472 |
+
this.preBuffer.push(chunk);
|
| 473 |
+
if (this.preBuffer.length > PRE_BUFFER_CHUNKS) {
|
| 474 |
+
this.preBuffer.shift();
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
if (this.postBufferRemaining > 0) {
|
| 478 |
+
this.sendAudioToEncoder(chunk);
|
| 479 |
+
this.postBufferRemaining--;
|
| 480 |
+
|
| 481 |
+
if (this.postBufferRemaining === 0) {
|
| 482 |
+
this.finalizeSegmentEnd();
|
| 483 |
+
}
|
| 484 |
+
}
|
| 485 |
+
}
|
| 486 |
+
}
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
sendAudioToEncoder(chunk, flush = false) {
|
| 490 |
+
if (!this.encoderWorker || !this.encoderReady) return;
|
| 491 |
+
|
| 492 |
+
// Accumulate chunks into batch buffer
|
| 493 |
+
this.encoderBatchBuffer.push(...chunk);
|
| 494 |
+
|
| 495 |
+
// Send when we have 320ms worth of audio, or on flush
|
| 496 |
+
if (this.encoderBatchBuffer.length >= ENCODER_BATCH_SAMPLES || flush) {
|
| 497 |
+
if (this.encoderBatchBuffer.length > 0) {
|
| 498 |
+
const batch = new Float32Array(this.encoderBatchBuffer);
|
| 499 |
+
this.encoderBatchBuffer = [];
|
| 500 |
+
|
| 501 |
+
this.encoderWorker.postMessage({
|
| 502 |
+
type: 'audio',
|
| 503 |
+
data: {
|
| 504 |
+
audio: batch,
|
| 505 |
+
segmentId: this.currentSegmentId
|
| 506 |
+
}
|
| 507 |
+
}, [batch.buffer]);
|
| 508 |
+
}
|
| 509 |
+
}
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
updateSegmentState() {
|
| 513 |
+
if (this.state === 'idle') {
|
| 514 |
+
if (this.emaProb >= this.onsetThreshold) {
|
| 515 |
+
this.onsetCounter++;
|
| 516 |
+
if (this.onsetCounter >= 2) {
|
| 517 |
+
this.startSegment();
|
| 518 |
+
}
|
| 519 |
+
} else {
|
| 520 |
+
this.onsetCounter = 0;
|
| 521 |
+
}
|
| 522 |
+
} else if (this.state === 'speech') {
|
| 523 |
+
if (this.emaProb < this.offsetThreshold) {
|
| 524 |
+
this.offsetCounter++;
|
| 525 |
+
if (this.offsetCounter >= 3) {
|
| 526 |
+
this.endSegment();
|
| 527 |
+
}
|
| 528 |
+
} else {
|
| 529 |
+
this.offsetCounter = 0;
|
| 530 |
+
}
|
| 531 |
+
}
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
startSegment() {
|
| 535 |
+
this.currentSegmentId++;
|
| 536 |
+
this.state = 'speech';
|
| 537 |
+
this.onsetCounter = 0;
|
| 538 |
+
this.offsetCounter = 0;
|
| 539 |
+
this.encoderBatchBuffer = []; // Reset batch buffer for new segment
|
| 540 |
+
|
| 541 |
+
// Tell encoder to start new segment
|
| 542 |
+
this.encoderWorker?.postMessage({
|
| 543 |
+
type: 'segment_start',
|
| 544 |
+
data: { segmentId: this.currentSegmentId }
|
| 545 |
+
});
|
| 546 |
+
|
| 547 |
+
// Drain pre-buffer
|
| 548 |
+
while (this.preBuffer.length > 0) {
|
| 549 |
+
const chunk = this.preBuffer.shift();
|
| 550 |
+
this.sendAudioToEncoder(chunk);
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
this.onStatusUpdate?.('recording', 'Recording...');
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
endSegment() {
|
| 557 |
+
this.state = 'idle';
|
| 558 |
+
this.offsetCounter = 0;
|
| 559 |
+
this.postBufferRemaining = POST_BUFFER_CHUNKS;
|
| 560 |
+
|
| 561 |
+
if (this.postBufferRemaining === 0) {
|
| 562 |
+
this.finalizeSegmentEnd();
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
this.onStatusUpdate?.('listening', 'Listening...');
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
finalizeSegmentEnd() {
|
| 569 |
+
// Process remaining complete chunks
|
| 570 |
+
while (this.asrBuffer.length >= ASR_CHUNK_SAMPLES) {
|
| 571 |
+
const chunkData = this.asrBuffer.splice(0, ASR_CHUNK_SAMPLES);
|
| 572 |
+
const chunk = new Float32Array(chunkData);
|
| 573 |
+
this.sendAudioToEncoder(chunk);
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
// Pad and send partial chunk
|
| 577 |
+
if (this.asrBuffer.length > 0) {
|
| 578 |
+
const padded = new Float32Array(ASR_CHUNK_SAMPLES);
|
| 579 |
+
padded.set(this.asrBuffer);
|
| 580 |
+
this.sendAudioToEncoder(padded);
|
| 581 |
+
}
|
| 582 |
+
this.asrBuffer = [];
|
| 583 |
+
|
| 584 |
+
// Flush any remaining audio in the batch buffer
|
| 585 |
+
this.sendAudioToEncoder(new Float32Array(0), true);
|
| 586 |
+
|
| 587 |
+
// Signal segment end
|
| 588 |
+
this.encoderWorker?.postMessage({
|
| 589 |
+
type: 'segment_end',
|
| 590 |
+
data: { segmentId: this.currentSegmentId }
|
| 591 |
+
});
|
| 592 |
+
}
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
// =============================================================================
|
| 596 |
+
// UI Controller
|
| 597 |
+
// =============================================================================
|
| 598 |
+
|
| 599 |
+
class ASRDemoUI {
|
| 600 |
+
constructor() {
|
| 601 |
+
this.asr = null;
|
| 602 |
+
this.vadCanvas = null;
|
| 603 |
+
this.vadCtx = null;
|
| 604 |
+
|
| 605 |
+
this.initElements();
|
| 606 |
+
this.initCanvas();
|
| 607 |
+
this.bindEvents();
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
initElements() {
|
| 611 |
+
this.loadingOverlay = document.getElementById('loadingOverlay');
|
| 612 |
+
this.loadingText = document.getElementById('loadingText');
|
| 613 |
+
this.errorMessage = document.getElementById('errorMessage');
|
| 614 |
+
this.statusDot = document.getElementById('statusDot');
|
| 615 |
+
this.statusText = document.getElementById('statusText');
|
| 616 |
+
this.startBtn = document.getElementById('startBtn');
|
| 617 |
+
this.stopBtn = document.getElementById('stopBtn');
|
| 618 |
+
this.vadBarFill = document.getElementById('vadBarFill');
|
| 619 |
+
this.vadValue = document.getElementById('vadValue');
|
| 620 |
+
this.audioQueueSize = document.getElementById('audioQueueSize');
|
| 621 |
+
this.featuresQueueSize = document.getElementById('featuresQueueSize');
|
| 622 |
+
this.droppedChunksEl = document.getElementById('droppedChunks');
|
| 623 |
+
this.transcriptsList = document.getElementById('transcriptsList');
|
| 624 |
+
this.liveCaption = document.getElementById('liveCaption');
|
| 625 |
+
this.liveCaptionText = document.getElementById('liveCaptionText');
|
| 626 |
+
this.modelSelect = document.getElementById('modelSelect');
|
| 627 |
+
this.onnxUrl = document.getElementById('onnxUrl');
|
| 628 |
+
this.onsetThreshold = document.getElementById('onsetThreshold');
|
| 629 |
+
this.offsetThreshold = document.getElementById('offsetThreshold');
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
initCanvas() {
|
| 633 |
+
this.vadCanvas = document.getElementById('vadCanvas');
|
| 634 |
+
this.vadCtx = this.vadCanvas.getContext('2d');
|
| 635 |
+
|
| 636 |
+
const rect = this.vadCanvas.getBoundingClientRect();
|
| 637 |
+
this.vadCanvas.width = rect.width * window.devicePixelRatio;
|
| 638 |
+
this.vadCanvas.height = rect.height * window.devicePixelRatio;
|
| 639 |
+
this.vadCtx.scale(window.devicePixelRatio, window.devicePixelRatio);
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
bindEvents() {
|
| 643 |
+
this.startBtn.addEventListener('click', () => this.handleStart());
|
| 644 |
+
this.stopBtn.addEventListener('click', () => this.handleStop());
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
async handleStart() {
|
| 648 |
+
try {
|
| 649 |
+
this.showLoading('Initializing...');
|
| 650 |
+
|
| 651 |
+
const config = {
|
| 652 |
+
modelName: this.modelSelect.value,
|
| 653 |
+
onnxUrl: this.onnxUrl.value || './models',
|
| 654 |
+
onsetThreshold: parseFloat(this.onsetThreshold.value),
|
| 655 |
+
offsetThreshold: parseFloat(this.offsetThreshold.value)
|
| 656 |
+
};
|
| 657 |
+
|
| 658 |
+
this.asr = new PipelinedStreamingASR(config);
|
| 659 |
+
|
| 660 |
+
this.asr.onVadUpdate = (prob, history) => this.updateVadDisplay(prob, history);
|
| 661 |
+
this.asr.onTranscript = (text, segmentId) => this.addTranscript(text, segmentId);
|
| 662 |
+
this.asr.onLiveCaption = (text) => this.updateLiveCaption(text);
|
| 663 |
+
this.asr.onStatusUpdate = (status, text) => this.updateStatus(status, text);
|
| 664 |
+
|
| 665 |
+
await this.asr.loadModels((text) => {
|
| 666 |
+
this.loadingText.textContent = text;
|
| 667 |
+
});
|
| 668 |
+
|
| 669 |
+
await this.asr.start();
|
| 670 |
+
|
| 671 |
+
this.hideLoading();
|
| 672 |
+
this.startBtn.disabled = true;
|
| 673 |
+
this.stopBtn.disabled = false;
|
| 674 |
+
this.disableConfig(true);
|
| 675 |
+
|
| 676 |
+
} catch (error) {
|
| 677 |
+
console.error('Start error:', error);
|
| 678 |
+
this.hideLoading();
|
| 679 |
+
this.showError(`Failed to start: ${error.message}`);
|
| 680 |
+
}
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
handleStop() {
|
| 684 |
+
if (this.asr) {
|
| 685 |
+
this.asr.stop();
|
| 686 |
+
this.asr = null;
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
this.startBtn.disabled = false;
|
| 690 |
+
this.stopBtn.disabled = true;
|
| 691 |
+
this.disableConfig(false);
|
| 692 |
+
this.updateStatus('idle', 'Ready');
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
updateVadDisplay(prob, history) {
|
| 696 |
+
this.vadBarFill.style.width = `${prob * 100}%`;
|
| 697 |
+
this.vadValue.textContent = `${Math.round(prob * 100)}%`;
|
| 698 |
+
|
| 699 |
+
const ctx = this.vadCtx;
|
| 700 |
+
const rect = this.vadCanvas.getBoundingClientRect();
|
| 701 |
+
const width = rect.width;
|
| 702 |
+
const height = rect.height;
|
| 703 |
+
|
| 704 |
+
ctx.fillStyle = '#0f0f23';
|
| 705 |
+
ctx.fillRect(0, 0, width, height);
|
| 706 |
+
|
| 707 |
+
if (history.length < 2) return;
|
| 708 |
+
|
| 709 |
+
const onsetY = height * (1 - parseFloat(this.onsetThreshold.value));
|
| 710 |
+
const offsetY = height * (1 - parseFloat(this.offsetThreshold.value));
|
| 711 |
+
|
| 712 |
+
ctx.strokeStyle = '#ff444466';
|
| 713 |
+
ctx.beginPath();
|
| 714 |
+
ctx.moveTo(0, onsetY);
|
| 715 |
+
ctx.lineTo(width, onsetY);
|
| 716 |
+
ctx.stroke();
|
| 717 |
+
|
| 718 |
+
ctx.strokeStyle = '#00ff8866';
|
| 719 |
+
ctx.beginPath();
|
| 720 |
+
ctx.moveTo(0, offsetY);
|
| 721 |
+
ctx.lineTo(width, offsetY);
|
| 722 |
+
ctx.stroke();
|
| 723 |
+
|
| 724 |
+
ctx.strokeStyle = '#00d4ff';
|
| 725 |
+
ctx.lineWidth = 2;
|
| 726 |
+
ctx.beginPath();
|
| 727 |
+
|
| 728 |
+
for (let i = 0; i < history.length; i++) {
|
| 729 |
+
const x = (i / (history.length - 1)) * width;
|
| 730 |
+
const y = height * (1 - history[i]);
|
| 731 |
+
if (i === 0) {
|
| 732 |
+
ctx.moveTo(x, y);
|
| 733 |
+
} else {
|
| 734 |
+
ctx.lineTo(x, y);
|
| 735 |
+
}
|
| 736 |
+
}
|
| 737 |
+
ctx.stroke();
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
addTranscript(text, segmentId) {
|
| 741 |
+
if (!text || !text.trim()) return;
|
| 742 |
+
|
| 743 |
+
const item = document.createElement('div');
|
| 744 |
+
item.className = 'transcript-item';
|
| 745 |
+
item.innerHTML = `
|
| 746 |
+
<span class="transcript-duration">#${segmentId}</span>
|
| 747 |
+
<span class="transcript-text">${this.escapeHtml(text)}</span>
|
| 748 |
+
`;
|
| 749 |
+
this.transcriptsList.appendChild(item);
|
| 750 |
+
this.transcriptsList.scrollTop = this.transcriptsList.scrollHeight;
|
| 751 |
+
}
|
| 752 |
+
|
| 753 |
+
updateLiveCaption(text) {
|
| 754 |
+
if (text) {
|
| 755 |
+
this.liveCaptionText.textContent = text;
|
| 756 |
+
this.liveCaptionText.classList.remove('placeholder');
|
| 757 |
+
this.liveCaption.classList.add('active');
|
| 758 |
+
} else {
|
| 759 |
+
this.liveCaptionText.textContent = 'Waiting for speech...';
|
| 760 |
+
this.liveCaptionText.classList.add('placeholder');
|
| 761 |
+
this.liveCaption.classList.remove('active');
|
| 762 |
+
}
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
updateStatus(status, text) {
|
| 766 |
+
this.statusDot.className = 'status-dot ' + status;
|
| 767 |
+
this.statusText.textContent = text;
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
showLoading(text) {
|
| 771 |
+
this.loadingText.textContent = text;
|
| 772 |
+
this.loadingOverlay.classList.remove('hidden');
|
| 773 |
+
}
|
| 774 |
+
|
| 775 |
+
hideLoading() {
|
| 776 |
+
this.loadingOverlay.classList.add('hidden');
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
showError(message) {
|
| 780 |
+
this.errorMessage.textContent = message;
|
| 781 |
+
this.errorMessage.classList.add('visible');
|
| 782 |
+
}
|
| 783 |
+
|
| 784 |
+
disableConfig(disabled) {
|
| 785 |
+
this.modelSelect.disabled = disabled;
|
| 786 |
+
this.onnxUrl.disabled = disabled;
|
| 787 |
+
this.onsetThreshold.disabled = disabled;
|
| 788 |
+
this.offsetThreshold.disabled = disabled;
|
| 789 |
+
}
|
| 790 |
+
|
| 791 |
+
escapeHtml(text) {
|
| 792 |
+
const div = document.createElement('div');
|
| 793 |
+
div.textContent = text;
|
| 794 |
+
return div.innerHTML;
|
| 795 |
+
}
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
+
// Initialize on page load
|
| 799 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 800 |
+
window.asrDemo = new ASRDemoUI();
|
| 801 |
+
});
|
ten_vad.js
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
var createVADModule = (() => {
|
| 3 |
+
var _scriptDir = import.meta.url;
|
| 4 |
+
|
| 5 |
+
return (
|
| 6 |
+
function(createVADModule) {
|
| 7 |
+
createVADModule = createVADModule || {};
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
|
| 11 |
+
if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
|
| 12 |
+
var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
|
| 13 |
+
function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
|
| 14 |
+
function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
|
| 15 |
+
var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
|
| 16 |
+
X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
|
| 17 |
+
b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
|
| 18 |
+
(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
|
| 19 |
+
b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
|
| 20 |
+
a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
|
| 21 |
+
a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
|
| 22 |
+
function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
|
| 23 |
+
if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
return createVADModule.ready
|
| 27 |
+
}
|
| 28 |
+
);
|
| 29 |
+
})();
|
| 30 |
+
export default createVADModule;
|
ten_vad.wasm
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ec0b9640683987e15a4e54e4ce5642b2447c6e5d82b1be889b5099c75434fc3
|
| 3 |
+
size 283349
|