Spaces:
Sleeping
Sleeping
Commit
·
eb30231
1
Parent(s):
a2590ba
Refactor model loading in app.py and update TTS version in requirements.txt
Browse files- app.py +9 -12
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -28,23 +28,20 @@ if not speaker_audio_path.exists():
|
|
| 28 |
|
| 29 |
config_path = str(config_path)
|
| 30 |
vocab_path = str(vocab_path)
|
| 31 |
-
model_path = str(model_path
|
| 32 |
speaker_audio_path = str(speaker_audio_path)
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
@spaces.GPU
|
| 37 |
def infer_EGTTS(text: str, speaker_audio_path: str, temperature: float = 0.75):
|
| 38 |
-
global model
|
| 39 |
-
if model is None:
|
| 40 |
-
print("Loading model...")
|
| 41 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 42 |
-
config = XttsConfig()
|
| 43 |
-
config.load_json(config_path)
|
| 44 |
-
model = Xtts.init_from_config(config)
|
| 45 |
-
model.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=True, vocab_path=vocab_path)
|
| 46 |
-
model.to(device)
|
| 47 |
-
|
| 48 |
print("Computing speaker latents...")
|
| 49 |
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[speaker_audio_path])
|
| 50 |
|
|
|
|
| 28 |
|
| 29 |
config_path = str(config_path)
|
| 30 |
vocab_path = str(vocab_path)
|
| 31 |
+
model_path = str(model_path)
|
| 32 |
speaker_audio_path = str(speaker_audio_path)
|
| 33 |
|
| 34 |
+
config = XttsConfig()
|
| 35 |
+
config.load_json(config_path)
|
| 36 |
+
|
| 37 |
+
print("Loading model...")
|
| 38 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 39 |
+
model = Xtts.init_from_config(config)
|
| 40 |
+
model.load_checkpoint(config, checkpoint_path=model_path, use_deepspeed=True, vocab_path=vocab_path, eval=True)
|
| 41 |
+
model.to(device)
|
| 42 |
|
| 43 |
@spaces.GPU
|
| 44 |
def infer_EGTTS(text: str, speaker_audio_path: str, temperature: float = 0.75):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
print("Computing speaker latents...")
|
| 46 |
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[speaker_audio_path])
|
| 47 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
git+https://github.com/coqui-ai/TTS
|
| 2 |
transformers
|
| 3 |
deepspeed
|
| 4 |
triton
|
|
|
|
| 1 |
+
TTS @ git+https://github.com/coqui-ai/TTS@v0.21.1
|
| 2 |
transformers
|
| 3 |
deepspeed
|
| 4 |
triton
|