Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from zonos.conditioning import make_cond_dict
|
|
| 7 |
|
| 8 |
# Global cache to hold the loaded model
|
| 9 |
MODEL = None
|
|
|
|
| 10 |
|
| 11 |
def load_model():
|
| 12 |
"""
|
|
@@ -50,14 +51,14 @@ def tts(text, speaker_audio):
|
|
| 50 |
# Get speaker embedding
|
| 51 |
with torch.no_grad():
|
| 52 |
spk_embedding = model.make_speaker_embedding(wav_tensor, sr)
|
| 53 |
-
spk_embedding = spk_embedding.to(
|
| 54 |
|
| 55 |
# Prepare conditioning dictionary
|
| 56 |
cond_dict = make_cond_dict(
|
| 57 |
text=text, # The text prompt
|
| 58 |
speaker=spk_embedding, # Speaker embedding from reference audio
|
| 59 |
language="en-us", # Hard-coded language or switch to another if needed
|
| 60 |
-
device=
|
| 61 |
)
|
| 62 |
conditioning = model.prepare_conditioning(cond_dict)
|
| 63 |
|
|
|
|
| 7 |
|
| 8 |
# Global cache to hold the loaded model
|
| 9 |
MODEL = None
|
| 10 |
+
device = "cuda"
|
| 11 |
|
| 12 |
def load_model():
|
| 13 |
"""
|
|
|
|
| 51 |
# Get speaker embedding
|
| 52 |
with torch.no_grad():
|
| 53 |
spk_embedding = model.make_speaker_embedding(wav_tensor, sr)
|
| 54 |
+
spk_embedding = spk_embedding.to(device, dtype=torch.bfloat16)
|
| 55 |
|
| 56 |
# Prepare conditioning dictionary
|
| 57 |
cond_dict = make_cond_dict(
|
| 58 |
text=text, # The text prompt
|
| 59 |
speaker=spk_embedding, # Speaker embedding from reference audio
|
| 60 |
language="en-us", # Hard-coded language or switch to another if needed
|
| 61 |
+
device=device,
|
| 62 |
)
|
| 63 |
conditioning = model.prepare_conditioning(cond_dict)
|
| 64 |
|