Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -325,7 +325,7 @@ def generate(text, voice='af', ps=None, speed=1, trim=0.5, use_gpu='auto', sk=No
|
|
325 |
def toggle_autoplay(autoplay):
|
326 |
return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
|
327 |
|
328 |
-
|
329 |
'🇺🇸 en-US': 'a',
|
330 |
'🇬🇧 en-GB': 'b',
|
331 |
'🇫🇷 fr-FR': 'f',
|
@@ -334,7 +334,7 @@ PREVIEW_LANGUAGES = {
|
|
334 |
'🇨🇳 zh-CN': 'z',
|
335 |
}
|
336 |
|
337 |
-
|
338 |
a={
|
339 |
'🇺🇸 🚺 American Female ⭐': 'af',
|
340 |
'🇺🇸 🚺 Bella ⭐': 'af_bella',
|
@@ -415,13 +415,13 @@ z={
|
|
415 |
},
|
416 |
)
|
417 |
def change_language(value):
|
418 |
-
choices = list(
|
419 |
info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
|
420 |
return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
|
421 |
|
422 |
from gradio_client import Client
|
423 |
client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
|
424 |
-
def
|
425 |
if not text.strip():
|
426 |
return None
|
427 |
assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
|
@@ -436,13 +436,13 @@ def preview(text, voice, speed, trim, sk):
|
|
436 |
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
437 |
return audio
|
438 |
|
439 |
-
with gr.Blocks() as
|
440 |
with gr.Row():
|
441 |
-
lang = gr.Radio(choices=
|
442 |
with gr.Row():
|
443 |
with gr.Column():
|
444 |
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
445 |
-
voice = gr.Dropdown(list(
|
446 |
lang.change(fn=change_language, inputs=[lang], outputs=[voice])
|
447 |
with gr.Row():
|
448 |
random_btn = gr.Button('Random Text', variant='secondary')
|
@@ -459,15 +459,71 @@ with gr.Blocks() as preview_tts:
|
|
459 |
gr.Markdown('''
|
460 |
🎉 New! Kokoro v0.22 now supports 5 languages. 🎉
|
461 |
|
462 |
-
📡 Telemetry: For debugging purposes, the text you enter may be printed to temporary logs, which are periodically wiped.
|
463 |
|
464 |
-
⚠️
|
465 |
|
466 |
🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
|
467 |
''', container=True)
|
468 |
with gr.Row():
|
469 |
sk = gr.Textbox(visible=False)
|
470 |
text.change(lambda: os.environ['SK'], outputs=[sk])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
text.submit(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
472 |
generate_btn.click(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
473 |
|
@@ -762,7 +818,7 @@ This Space and the underlying Kokoro model are both under development and subjec
|
|
762 |
with gr.Blocks() as changelog:
|
763 |
gr.Markdown('''
|
764 |
**8 Dec 2024**<br/>
|
765 |
-
🚀
|
766 |
🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
|
767 |
🗣️ 68 total voices<br/>
|
768 |
📁 Added data card and telemetry notice
|
@@ -830,8 +886,8 @@ These datasets were **NOT** used to train Kokoro. They may be of interest to aca
|
|
830 |
|
831 |
with gr.Blocks() as app:
|
832 |
gr.TabbedInterface(
|
833 |
-
[preview_tts, basic_tts, lf_tts, about, data_card, changelog],
|
834 |
-
['🔥 Preview v0.22', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
|
835 |
)
|
836 |
|
837 |
if __name__ == '__main__':
|
|
|
325 |
def toggle_autoplay(autoplay):
|
326 |
return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
|
327 |
|
328 |
+
ML_LANGUAGES = {
|
329 |
'🇺🇸 en-US': 'a',
|
330 |
'🇬🇧 en-GB': 'b',
|
331 |
'🇫🇷 fr-FR': 'f',
|
|
|
334 |
'🇨🇳 zh-CN': 'z',
|
335 |
}
|
336 |
|
337 |
+
ML_CHOICES = dict(
|
338 |
a={
|
339 |
'🇺🇸 🚺 American Female ⭐': 'af',
|
340 |
'🇺🇸 🚺 Bella ⭐': 'af_bella',
|
|
|
415 |
},
|
416 |
)
|
417 |
def change_language(value):
|
418 |
+
choices = list(ML_CHOICES[value].items())
|
419 |
info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
|
420 |
return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
|
421 |
|
422 |
from gradio_client import Client
|
423 |
client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
|
424 |
+
def multilingual(text, voice, speed, trim, sk):
|
425 |
if not text.strip():
|
426 |
return None
|
427 |
assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
|
|
|
436 |
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
437 |
return audio
|
438 |
|
439 |
+
with gr.Blocks() as ml_tts:
|
440 |
with gr.Row():
|
441 |
+
lang = gr.Radio(choices=ML_LANGUAGES.items(), value='a', label='Language', show_label=False)
|
442 |
with gr.Row():
|
443 |
with gr.Column():
|
444 |
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
445 |
+
voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value='af', label='Voice', info='⭐ voices are stable, 🧪 are unstable')
|
446 |
lang.change(fn=change_language, inputs=[lang], outputs=[voice])
|
447 |
with gr.Row():
|
448 |
random_btn = gr.Button('Random Text', variant='secondary')
|
|
|
459 |
gr.Markdown('''
|
460 |
🎉 New! Kokoro v0.22 now supports 5 languages. 🎉
|
461 |
|
462 |
+
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
463 |
|
464 |
+
⚠️ Multilingual v0.22 does not yet support custom pronunciation, Long Form, or Voice Mixer. You can still use these features for v0.19.
|
465 |
|
466 |
🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
|
467 |
''', container=True)
|
468 |
with gr.Row():
|
469 |
sk = gr.Textbox(visible=False)
|
470 |
text.change(lambda: os.environ['SK'], outputs=[sk])
|
471 |
+
text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
472 |
+
generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
473 |
+
|
474 |
+
client = Client('hexgrad/kokoro-src-x', hf_token=os.environ['SRC'])
|
475 |
+
def preview(text, voice, speed, trim, sk):
|
476 |
+
if not text.strip():
|
477 |
+
return None
|
478 |
+
assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
|
479 |
+
try:
|
480 |
+
audio, out_ps = client.predict(text=text, voice=voice, speed=speed, trim=trim, use_gpu=True, sk=sk, api_name='/generate')
|
481 |
+
if len(out_ps) == 510:
|
482 |
+
gr.Warning('Input may have been truncated')
|
483 |
+
except Exception as e:
|
484 |
+
print('📡', datetime.now(), text, voice, repr(e))
|
485 |
+
gr.Warning('v0.22x temporarily unavailable')
|
486 |
+
gr.Info('Switching to v0.19')
|
487 |
+
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
488 |
+
return audio
|
489 |
+
|
490 |
+
def vote(btn):
|
491 |
+
print(btn)
|
492 |
+
gr.Info('Thanks for the feedback!')
|
493 |
+
|
494 |
+
with gr.Blocks() as preview_tts:
|
495 |
+
with gr.Row():
|
496 |
+
gr.Markdown('''
|
497 |
+
🧪 Experimental: v0.22x is a single speaker test voice to determine if the default English voice should be changed. 🧪
|
498 |
+
|
499 |
+
☝️ Check out v0.19 and multilingual v0.22 for a lot more voices, languages, and features!
|
500 |
+
|
501 |
+
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
502 |
+
''', container=True)
|
503 |
+
with gr.Row():
|
504 |
+
with gr.Column():
|
505 |
+
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
506 |
+
voice = gr.Dropdown([('🇺🇸 🚺 AF Experimental 🧪', 'afx')], value='afx', label='Voice', info='⭐ voices are stable, 🧪 are unstable', interactive=False)
|
507 |
+
with gr.Row():
|
508 |
+
random_btn = gr.Button('Random Text', variant='secondary')
|
509 |
+
generate_btn = gr.Button('Generate', variant='primary')
|
510 |
+
random_btn.click(get_random_text, inputs=[voice], outputs=[text])
|
511 |
+
with gr.Column():
|
512 |
+
audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
|
513 |
+
with gr.Accordion('Audio Settings', open=False):
|
514 |
+
autoplay = gr.Checkbox(value=True, label='Autoplay')
|
515 |
+
autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
|
516 |
+
speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='⚡️ Speed', info='Adjust the speaking speed')
|
517 |
+
trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
|
518 |
+
with gr.Row():
|
519 |
+
with gr.Accordion('Feedback', open=True):
|
520 |
+
new_btn = gr.Button('I prefer the new, Experimental 🧪 voice', variant='secondary')
|
521 |
+
new_btn.click(new_btn)
|
522 |
+
old_btn = gr.Button('I prefer the old, American Female ⭐ voice', variant='secondary')
|
523 |
+
old_btn.click(old_btn)
|
524 |
+
with gr.Row():
|
525 |
+
sk = gr.Textbox(visible=False)
|
526 |
+
text.change(lambda: os.environ['SK'], outputs=[sk])
|
527 |
text.submit(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
528 |
generate_btn.click(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
529 |
|
|
|
818 |
with gr.Blocks() as changelog:
|
819 |
gr.Markdown('''
|
820 |
**8 Dec 2024**<br/>
|
821 |
+
🚀 Multilingual v0.22<br/>
|
822 |
🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
|
823 |
🗣️ 68 total voices<br/>
|
824 |
📁 Added data card and telemetry notice
|
|
|
886 |
|
887 |
with gr.Blocks() as app:
|
888 |
gr.TabbedInterface(
|
889 |
+
[preview_tts, ml_tts, basic_tts, lf_tts, about, data_card, changelog],
|
890 |
+
['🔥 Preview v0.22x', '🌐 Multilingual v0.22', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
|
891 |
)
|
892 |
|
893 |
if __name__ == '__main__':
|