Spaces:
Sleeping
Sleeping
mrfakename
commited on
Commit
·
addff22
1
Parent(s):
5cf7b18
Gruut doesn't work
Browse files- app.py +4 -4
- styletts2importable.py +6 -18
app.py
CHANGED
@@ -16,13 +16,13 @@ voices = {}
|
|
16 |
# else:
|
17 |
for v in voicelist:
|
18 |
voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
|
19 |
-
def synthesize(text, voice
|
20 |
if text.strip() == "":
|
21 |
raise gr.Error("You must enter some text")
|
22 |
if len(text) > 300:
|
23 |
raise gr.Error("Text must be under 300 characters")
|
24 |
v = voice.lower()
|
25 |
-
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1
|
26 |
def clsynthesize(text, voice):
|
27 |
if text.strip() == "":
|
28 |
raise gr.Error("You must enter some text")
|
@@ -43,11 +43,11 @@ with gr.Blocks() as vctk:
|
|
43 |
with gr.Column(scale=1):
|
44 |
inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
45 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
46 |
-
use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
47 |
with gr.Column(scale=1):
|
48 |
btn = gr.Button("Synthesize", variant="primary")
|
49 |
audio = gr.Audio(interactive=False, label="Synthesized Audio")
|
50 |
-
btn.click(synthesize, inputs=[inp, voice
|
51 |
with gr.Blocks() as clone:
|
52 |
with gr.Row():
|
53 |
with gr.Column(scale=1):
|
|
|
16 |
# else:
|
17 |
for v in voicelist:
|
18 |
voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
|
19 |
+
def synthesize(text, voice):
|
20 |
if text.strip() == "":
|
21 |
raise gr.Error("You must enter some text")
|
22 |
if len(text) > 300:
|
23 |
raise gr.Error("Text must be under 300 characters")
|
24 |
v = voice.lower()
|
25 |
+
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
26 |
def clsynthesize(text, voice):
|
27 |
if text.strip() == "":
|
28 |
raise gr.Error("You must enter some text")
|
|
|
43 |
with gr.Column(scale=1):
|
44 |
inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
45 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
46 |
+
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
47 |
with gr.Column(scale=1):
|
48 |
btn = gr.Button("Synthesize", variant="primary")
|
49 |
audio = gr.Audio(interactive=False, label="Synthesized Audio")
|
50 |
+
btn.click(synthesize, inputs=[inp, voice], outputs=[audio], concurrency_limit=4)
|
51 |
with gr.Blocks() as clone:
|
52 |
with gr.Row():
|
53 |
with gr.Column(scale=1):
|
styletts2importable.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from cached_path import cached_path
|
2 |
-
print("GRUUT")
|
3 |
-
from gruut_phonemize import gphonemize
|
4 |
|
5 |
# from dp.phonemizer import Phonemizer
|
6 |
print("NLTK")
|
@@ -135,10 +135,7 @@ sampler = DiffusionSampler(
|
|
135 |
|
136 |
def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
|
137 |
text = text.strip()
|
138 |
-
|
139 |
-
ps = gphonemize(text)
|
140 |
-
else:
|
141 |
-
ps = global_phonemizer.phonemize([text])
|
142 |
ps = word_tokenize(ps[0])
|
143 |
ps = ' '.join(ps)
|
144 |
tokens = textclenaer(ps)
|
@@ -207,10 +204,7 @@ def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding
|
|
207 |
|
208 |
def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
|
209 |
text = text.strip()
|
210 |
-
|
211 |
-
ps = gphonemize(text)
|
212 |
-
else:
|
213 |
-
ps = global_phonemizer.phonemize([text])
|
214 |
ps = word_tokenize(ps[0])
|
215 |
ps = ' '.join(ps)
|
216 |
ps = ps.replace('``', '"')
|
@@ -287,10 +281,7 @@ def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion
|
|
287 |
|
288 |
def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
|
289 |
text = text.strip()
|
290 |
-
|
291 |
-
ps = gphonemize(text)
|
292 |
-
else:
|
293 |
-
ps = global_phonemizer.phonemize([text])
|
294 |
ps = word_tokenize(ps[0])
|
295 |
ps = ' '.join(ps)
|
296 |
|
@@ -299,10 +290,7 @@ def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=
|
|
299 |
tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
|
300 |
|
301 |
ref_text = ref_text.strip()
|
302 |
-
|
303 |
-
ps = gphonemize(text)
|
304 |
-
else:
|
305 |
-
ps = global_phonemizer.phonemize([ref_text])
|
306 |
ps = word_tokenize(ps[0])
|
307 |
ps = ' '.join(ps)
|
308 |
|
|
|
1 |
from cached_path import cached_path
|
2 |
+
# print("GRUUT")
|
3 |
+
# from gruut_phonemize import gphonemize
|
4 |
|
5 |
# from dp.phonemizer import Phonemizer
|
6 |
print("NLTK")
|
|
|
135 |
|
136 |
def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
|
137 |
text = text.strip()
|
138 |
+
ps = global_phonemizer.phonemize([text])
|
|
|
|
|
|
|
139 |
ps = word_tokenize(ps[0])
|
140 |
ps = ' '.join(ps)
|
141 |
tokens = textclenaer(ps)
|
|
|
204 |
|
205 |
def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
|
206 |
text = text.strip()
|
207 |
+
ps = global_phonemizer.phonemize([text])
|
|
|
|
|
|
|
208 |
ps = word_tokenize(ps[0])
|
209 |
ps = ' '.join(ps)
|
210 |
ps = ps.replace('``', '"')
|
|
|
281 |
|
282 |
def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1, use_gruut=False):
|
283 |
text = text.strip()
|
284 |
+
ps = global_phonemizer.phonemize([text])
|
|
|
|
|
|
|
285 |
ps = word_tokenize(ps[0])
|
286 |
ps = ' '.join(ps)
|
287 |
|
|
|
290 |
tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
|
291 |
|
292 |
ref_text = ref_text.strip()
|
293 |
+
ps = global_phonemizer.phonemize([ref_text])
|
|
|
|
|
|
|
294 |
ps = word_tokenize(ps[0])
|
295 |
ps = ' '.join(ps)
|
296 |
|