Spaces:
Runtime error
Runtime error
Florian Lux
commited on
Commit
·
49696ae
1
Parent(s):
c8c05d4
try it one more time with speaker
Browse files- .gitignore +2 -1
- app.py +13 -11
- packages.txt +1 -0
.gitignore
CHANGED
|
@@ -11,4 +11,5 @@ audios/
|
|
| 11 |
*playground*
|
| 12 |
*.json
|
| 13 |
.tmp/
|
| 14 |
-
.vscode/
|
|
|
|
|
|
| 11 |
*playground*
|
| 12 |
*.json
|
| 13 |
.tmp/
|
| 14 |
+
.vscode/
|
| 15 |
+
Models/
|
app.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
import gdown
|
| 4 |
import gradio as gr
|
| 5 |
import numpy as np
|
| 6 |
import torch
|
| 7 |
|
| 8 |
from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
|
| 9 |
-
import os
|
| 10 |
|
| 11 |
os.system("pip uninstall -y gradio")
|
| 12 |
-
os.system("pip install gradio==2.7.5")
|
|
|
|
| 13 |
|
| 14 |
def float2pcm(sig, dtype='int16'):
|
| 15 |
"""
|
|
@@ -30,16 +29,10 @@ def float2pcm(sig, dtype='int16'):
|
|
| 30 |
class TTS_Interface:
|
| 31 |
|
| 32 |
def __init__(self):
|
| 33 |
-
os.makedirs("Models/HiFiGAN_combined", exist_ok=True)
|
| 34 |
-
os.makedirs("Models/FastSpeech2_Meta", exist_ok=True)
|
| 35 |
-
if not os.path.exists("Models/FastSpeech2_Meta/best.pt"):
|
| 36 |
-
gdown.download(id="1-AhjmCR6DDI6rtzPIn9ksOxQyHKf6CbG", output="Models/FastSpeech2_Meta/best.pt")
|
| 37 |
-
if not os.path.exists("Models/HiFiGAN_combined/best.pt"):
|
| 38 |
-
gdown.download(id="1-5sP-0JDUvKTjxhO3hUVJgArSUjuhU6P", output="Models/HiFiGAN_combined/best.pt")
|
| 39 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 40 |
self.model = Meta_FastSpeech2(device=self.device)
|
| 41 |
|
| 42 |
-
def read(self, prompt, language):
|
| 43 |
language_id_lookup = {
|
| 44 |
"English" : "en",
|
| 45 |
"German" : "de",
|
|
@@ -52,6 +45,11 @@ class TTS_Interface:
|
|
| 52 |
"French" : "fr"
|
| 53 |
}
|
| 54 |
self.model.set_language(language_id_lookup[language])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
wav = self.model(prompt)
|
| 56 |
return 48000, float2pcm(wav.cpu().numpy())
|
| 57 |
|
|
@@ -69,7 +67,11 @@ iface = gr.Interface(fn=meta_model.read,
|
|
| 69 |
'Russian',
|
| 70 |
'Hungarian',
|
| 71 |
'Dutch',
|
| 72 |
-
'French'], type="value", default='English', label="Language Selection")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
outputs=gr.outputs.Audio(type="numpy", label=None),
|
| 74 |
layout="vertical",
|
| 75 |
title="IMS Toucan Multilingual Multispeaker Demo",
|
|
|
|
| 1 |
import os
|
| 2 |
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
import numpy as np
|
| 5 |
import torch
|
| 6 |
|
| 7 |
from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
|
|
|
|
| 8 |
|
| 9 |
os.system("pip uninstall -y gradio")
|
| 10 |
+
os.system("pip install gradio==2.7.5.2")
|
| 11 |
+
|
| 12 |
|
| 13 |
def float2pcm(sig, dtype='int16'):
|
| 14 |
"""
|
|
|
|
| 29 |
class TTS_Interface:
|
| 30 |
|
| 31 |
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 33 |
self.model = Meta_FastSpeech2(device=self.device)
|
| 34 |
|
| 35 |
+
def read(self, prompt, language, path_to_audio):
|
| 36 |
language_id_lookup = {
|
| 37 |
"English" : "en",
|
| 38 |
"German" : "de",
|
|
|
|
| 45 |
"French" : "fr"
|
| 46 |
}
|
| 47 |
self.model.set_language(language_id_lookup[language])
|
| 48 |
+
if path_to_audio is not None:
|
| 49 |
+
try:
|
| 50 |
+
self.model.set_utterance_embedding(path_to_audio)
|
| 51 |
+
except RuntimeError:
|
| 52 |
+
pass
|
| 53 |
wav = self.model(prompt)
|
| 54 |
return 48000, float2pcm(wav.cpu().numpy())
|
| 55 |
|
|
|
|
| 67 |
'Russian',
|
| 68 |
'Hungarian',
|
| 69 |
'Dutch',
|
| 70 |
+
'French'], type="value", default='English', label="Language Selection"),
|
| 71 |
+
gr.inputs.Audio(source="microphone",
|
| 72 |
+
optional=True,
|
| 73 |
+
label="Make the TTS imitate your Voice (optional, press once to start recording and again to stop)",
|
| 74 |
+
type="filepath")],
|
| 75 |
outputs=gr.outputs.Audio(type="numpy", label=None),
|
| 76 |
layout="vertical",
|
| 77 |
title="IMS Toucan Multilingual Multispeaker Demo",
|
packages.txt
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
libsndfile1
|
| 2 |
espeak-ng
|
|
|
|
|
|
| 1 |
libsndfile1
|
| 2 |
espeak-ng
|
| 3 |
+
ffmpeg
|