Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -334,93 +334,16 @@ ML_LANGUAGES = {
|
|
334 |
'🇨🇳 zh-CN': 'z',
|
335 |
}
|
336 |
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
'🇺🇸 🚺 Sarah ⭐': 'af_sarah',
|
343 |
-
'🇺🇸 🚺 Alloy': 'af_alloy',
|
344 |
-
'🇺🇸 🚺 Jessica 🧪': 'af_jessica',
|
345 |
-
'🇺🇸 🚺 Matilda 🧪': 'af_matilda',
|
346 |
-
'🇺🇸 🚺 Nova': 'af_nova',
|
347 |
-
'🇺🇸 🚺 River': 'af_river',
|
348 |
-
'🇺🇸 🚺 Sky': 'af_sky',
|
349 |
-
'🇺🇸 🚹 Adam ⭐': 'am_adam',
|
350 |
-
'🇺🇸 🚹 Michael ⭐': 'am_michael',
|
351 |
-
'🇺🇸 🚹 Echo': 'am_echo',
|
352 |
-
'🇺🇸 🚹 Eric': 'am_eric',
|
353 |
-
'🇺🇸 🚹 Liam': 'am_liam',
|
354 |
-
'🇺🇸 🚹 Onyx': 'am_onyx',
|
355 |
-
'🇺🇸 🚹 Will 🧪': 'am_will',
|
356 |
-
},
|
357 |
-
b={
|
358 |
-
'🇬🇧 🚺 Alice': 'bf_alice',
|
359 |
-
'🇬🇧 🚺 Lily': 'bf_lily',
|
360 |
-
'🇬🇧 🚹 Lewis ⭐': 'bm_lewis',
|
361 |
-
'🇬🇧 🚹 Daniel': 'bm_daniel',
|
362 |
-
'🇬🇧 🚹 Fable': 'bm_fable',
|
363 |
-
'🇬🇧 🚹 George': 'bm_george',
|
364 |
-
},
|
365 |
-
f={'🇫🇷 🚺 French Alpha': 'fr_alpha'},
|
366 |
-
j={
|
367 |
-
'🇯🇵 🚺 Japanese Alpha ⭐': 'jf_alpha',
|
368 |
-
'🇯🇵 🚺 Japanese Beta': 'jf_theta',
|
369 |
-
'🇯🇵 🚺 Japanese Gamma': 'jf_iota',
|
370 |
-
'🇯🇵 🚺 Japanese Delta': 'jf_kappa',
|
371 |
-
'🇯🇵 🚺 Japanese Epsilon': 'jf_beta_0',
|
372 |
-
'🇯🇵 🚺 Japanese Zeta': 'jf_gamma_0',
|
373 |
-
'🇯🇵 🚺 Japanese Eta': 'jf_delta_0',
|
374 |
-
'🇯🇵 🚺 Japanese Theta': 'jf_epsilon',
|
375 |
-
'🇯🇵 🚺 Japanese Iota': 'jf_zeta',
|
376 |
-
'🇯🇵 🚺 Japanese Kappa': 'jf_eta',
|
377 |
-
'🇯🇵 🚹 Japanese Omega': 'jm_omega',
|
378 |
-
},
|
379 |
-
k={
|
380 |
-
'🇰🇷 🚺 Korean Alpha': 'kf_alpha',
|
381 |
-
'🇰🇷 🚺 Korean Beta': 'kf_beta',
|
382 |
-
'🇰🇷 🚺 Korean Gamma': 'kf_gamma',
|
383 |
-
'🇰🇷 🚺 Korean Delta': 'kf_delta',
|
384 |
-
'🇰🇷 🚺 Korean Epsilon': 'kf_epsilon',
|
385 |
-
'🇰🇷 🚺 Korean Zeta': 'kf_zeta',
|
386 |
-
'🇰🇷 🚺 Korean Eta': 'kf_eta',
|
387 |
-
'🇰🇷 🚺 Korean Theta': 'kf_theta',
|
388 |
-
'🇰🇷 🚺 Korean Iota': 'kf_iota',
|
389 |
-
'🇰🇷 🚺 Korean Kappa': 'kf_kappa',
|
390 |
-
'🇰🇷 🚺 Korean Lambda': 'kf_lambda',
|
391 |
-
'🇰🇷 🚺 Korean Mu': 'kf_mu',
|
392 |
-
'🇰🇷 🚺 Korean Nu': 'kf_nu',
|
393 |
-
'🇰🇷 🚺 Korean Xi': 'kf_xi',
|
394 |
-
'🇰🇷 🚺 Korean Omicron': 'kf_omicron',
|
395 |
-
'🇰🇷 🚹 Korean Pi': 'km_pi',
|
396 |
-
'🇰🇷 🚹 Korean Rho': 'km_rho',
|
397 |
-
'🇰🇷 🚹 Korean Sigma': 'km_sigma',
|
398 |
-
'🇰🇷 🚹 Korean Tau': 'km_tau',
|
399 |
-
'🇰🇷 🚹 Korean Upsilon': 'km_upsilon',
|
400 |
-
'🇰🇷 🚹 Korean Phi': 'km_phi',
|
401 |
-
'🇰🇷 🚹 Korean Chi': 'km_chi',
|
402 |
-
'🇰🇷 🚹 Korean Psi': 'km_psi',
|
403 |
-
'🇰🇷 🚹 Korean Omega': 'km_omega',
|
404 |
-
},
|
405 |
-
z={
|
406 |
-
'🇨🇳 🚺 Mandarin Alpha': 'zf_beta',
|
407 |
-
'🇨🇳 🚺 Mandarin Beta': 'zf_gamma',
|
408 |
-
'🇨🇳 🚺 Mandarin Gamma': 'zf_delta',
|
409 |
-
'🇨🇳 🚺 Mandarin Delta': 'zf_epsilon',
|
410 |
-
'🇨🇳 🚺 Mandarin Epsilon 🧪': 'zf_alpha',
|
411 |
-
'🇨🇳 🚹 Mandarin Phi': 'zm_phi',
|
412 |
-
'🇨🇳 🚹 Mandarin Chi': 'zm_chi',
|
413 |
-
'🇨🇳 🚹 Mandarin Psi': 'zm_psi',
|
414 |
-
'🇨🇳 🚹 Mandarin Omega': 'zm_omega',
|
415 |
-
},
|
416 |
-
)
|
417 |
def change_language(value):
|
418 |
choices = list(ML_CHOICES[value].items())
|
419 |
info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
|
420 |
return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
|
421 |
|
422 |
-
from gradio_client import Client
|
423 |
-
client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
|
424 |
def multilingual(text, voice, speed, trim, sk):
|
425 |
if not text.strip():
|
426 |
return None
|
@@ -431,7 +354,7 @@ def multilingual(text, voice, speed, trim, sk):
|
|
431 |
gr.Warning('Input may have been truncated')
|
432 |
except Exception as e:
|
433 |
print('📡', datetime.now(), text, voice, repr(e))
|
434 |
-
gr.Warning('v0.
|
435 |
gr.Info('Switching to v0.19')
|
436 |
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
437 |
return audio
|
@@ -442,7 +365,7 @@ with gr.Blocks() as ml_tts:
|
|
442 |
with gr.Row():
|
443 |
with gr.Column():
|
444 |
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
445 |
-
voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value=
|
446 |
lang.change(fn=change_language, inputs=[lang], outputs=[voice])
|
447 |
with gr.Row():
|
448 |
random_btn = gr.Button('Random Text', variant='secondary')
|
@@ -457,11 +380,11 @@ with gr.Blocks() as ml_tts:
|
|
457 |
trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
|
458 |
with gr.Row():
|
459 |
gr.Markdown('''
|
460 |
-
🎉 New! Kokoro v0.
|
461 |
|
462 |
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
463 |
|
464 |
-
⚠️
|
465 |
|
466 |
🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
|
467 |
''', container=True)
|
@@ -471,76 +394,6 @@ with gr.Blocks() as ml_tts:
|
|
471 |
text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
472 |
generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
473 |
|
474 |
-
client_x = Client('hexgrad/kokoro-src-x', hf_token=os.environ['SRC'])
|
475 |
-
def preview(text, voice, speed, trim, sk):
|
476 |
-
if not text.strip():
|
477 |
-
return None
|
478 |
-
assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
|
479 |
-
try:
|
480 |
-
audio, out_ps = client_x.predict(text=text, voice=voice, speed=speed, trim=trim, use_gpu=True, sk=sk, api_name='/generate')
|
481 |
-
if len(out_ps) == 510:
|
482 |
-
gr.Warning('Input may have been truncated')
|
483 |
-
except Exception as e:
|
484 |
-
print('📡', datetime.now(), text, voice, repr(e))
|
485 |
-
gr.Warning('v0.22x temporarily unavailable')
|
486 |
-
gr.Info('Switching to v0.19')
|
487 |
-
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
488 |
-
return audio
|
489 |
-
|
490 |
-
def vote(btn):
|
491 |
-
print(btn)
|
492 |
-
gr.Info('Thanks for the feedback!')
|
493 |
-
|
494 |
-
PREVIEW_CHOICES = {
|
495 |
-
'🇺🇸 🚺 Heart ❤️': 'af_heart',
|
496 |
-
'🇺🇸 🚺 Spirit 🦋': 'af_spirit',
|
497 |
-
'🇬🇧 🚺 Soul 🪽': 'bf_soul',
|
498 |
-
}
|
499 |
-
|
500 |
-
with gr.Blocks() as preview_tts:
|
501 |
-
with gr.Row():
|
502 |
-
gr.Markdown('''
|
503 |
-
🧪 Experimental: v0.22x previews a potential change to the default English voice. 🧪
|
504 |
-
|
505 |
-
☝️ Check out v0.19 and multilingual v0.22 for a lot more voices, languages, and features!
|
506 |
-
|
507 |
-
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
508 |
-
''', container=True)
|
509 |
-
with gr.Row():
|
510 |
-
with gr.Column():
|
511 |
-
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
512 |
-
voice = gr.Dropdown(list(PREVIEW_CHOICES.items()), value='af_heart', label='Voice', info='🧪 These voices are experimental')
|
513 |
-
with gr.Row():
|
514 |
-
random_btn = gr.Button('Random Text', variant='secondary')
|
515 |
-
generate_btn = gr.Button('Generate', variant='primary')
|
516 |
-
random_btn.click(get_random_text, inputs=[voice], outputs=[text])
|
517 |
-
with gr.Column():
|
518 |
-
audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
|
519 |
-
with gr.Accordion('Audio Settings', open=False):
|
520 |
-
autoplay = gr.Checkbox(value=True, label='Autoplay')
|
521 |
-
autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
|
522 |
-
speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='⚡️ Speed', info='Adjust the speaking speed')
|
523 |
-
trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
|
524 |
-
with gr.Row():
|
525 |
-
with gr.Accordion('Feedback', open=True):
|
526 |
-
with gr.Row():
|
527 |
-
gr.Markdown('Vote for the voice you like the best among 3 challengers and 1 defender.')
|
528 |
-
with gr.Row():
|
529 |
-
heart_btn = gr.Button('🇺🇸 🚺 Heart ❤️', variant='secondary')
|
530 |
-
heart_btn.click(vote, inputs=[heart_btn])
|
531 |
-
soul_btn = gr.Button('🇺🇸 🚺 Spirit 🦋', variant='secondary')
|
532 |
-
soul_btn.click(vote, inputs=[soul_btn])
|
533 |
-
with gr.Row():
|
534 |
-
spirit_btn = gr.Button('🇬🇧 🚺 Soul 🪽', variant='secondary')
|
535 |
-
spirit_btn.click(vote, inputs=[spirit_btn])
|
536 |
-
old_btn = gr.Button('🇺🇸 🚺 American Female ⭐', variant='secondary')
|
537 |
-
old_btn.click(vote, inputs=[old_btn])
|
538 |
-
with gr.Row():
|
539 |
-
sk = gr.Textbox(visible=False)
|
540 |
-
text.change(lambda: os.environ['SK'], outputs=[sk])
|
541 |
-
text.submit(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
542 |
-
generate_btn.click(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
543 |
-
|
544 |
USE_GPU_CHOICES = [('Auto 🔀', 'auto'), ('CPU 💬', False), ('ZeroGPU 📄', True)]
|
545 |
USE_GPU_INFOS = {
|
546 |
'auto': 'Use CPU or GPU, whichever is faster',
|
@@ -831,6 +684,10 @@ This Space and the underlying Kokoro model are both under development and subjec
|
|
831 |
'''
|
832 |
with gr.Blocks() as changelog:
|
833 |
gr.Markdown('''
|
|
|
|
|
|
|
|
|
834 |
**8 Dec 2024**<br/>
|
835 |
🚀 Multilingual v0.22<br/>
|
836 |
🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
|
@@ -900,8 +757,8 @@ These datasets were **NOT** used to train Kokoro. They may be of interest to aca
|
|
900 |
|
901 |
with gr.Blocks() as app:
|
902 |
gr.TabbedInterface(
|
903 |
-
[
|
904 |
-
['🔥
|
905 |
)
|
906 |
|
907 |
if __name__ == '__main__':
|
|
|
334 |
'🇨🇳 zh-CN': 'z',
|
335 |
}
|
336 |
|
337 |
+
from gradio_client import Client
|
338 |
+
client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
|
339 |
+
import json
|
340 |
+
ML_CHOICES = json.loads(client.predict(api_name='/list_voices'))
|
341 |
+
DEFAULT_VOICE = list(ML_CHOICES['a'].values())[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
def change_language(value):
|
343 |
choices = list(ML_CHOICES[value].items())
|
344 |
info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
|
345 |
return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
|
346 |
|
|
|
|
|
347 |
def multilingual(text, voice, speed, trim, sk):
|
348 |
if not text.strip():
|
349 |
return None
|
|
|
354 |
gr.Warning('Input may have been truncated')
|
355 |
except Exception as e:
|
356 |
print('📡', datetime.now(), text, voice, repr(e))
|
357 |
+
gr.Warning('v0.23 temporarily unavailable')
|
358 |
gr.Info('Switching to v0.19')
|
359 |
audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
|
360 |
return audio
|
|
|
365 |
with gr.Row():
|
366 |
with gr.Column():
|
367 |
text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
|
368 |
+
voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value=DEFAULT_VOICE, label='Voice', info='⭐ voices are stable, 🧪 are unstable')
|
369 |
lang.change(fn=change_language, inputs=[lang], outputs=[voice])
|
370 |
with gr.Row():
|
371 |
random_btn = gr.Button('Random Text', variant='secondary')
|
|
|
380 |
trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
|
381 |
with gr.Row():
|
382 |
gr.Markdown('''
|
383 |
+
🎉 New! Kokoro v0.23 now supports 5 languages, including a new default voices. 🎉
|
384 |
|
385 |
📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
|
386 |
|
387 |
+
⚠️ v0.23 does not yet support custom pronunciation, Long Form, or Voice Mixer. You can still use these features in v0.19.
|
388 |
|
389 |
🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
|
390 |
''', container=True)
|
|
|
394 |
text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
395 |
generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
|
396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
USE_GPU_CHOICES = [('Auto 🔀', 'auto'), ('CPU 💬', False), ('ZeroGPU 📄', True)]
|
398 |
USE_GPU_INFOS = {
|
399 |
'auto': 'Use CPU or GPU, whichever is faster',
|
|
|
684 |
'''
|
685 |
with gr.Blocks() as changelog:
|
686 |
gr.Markdown('''
|
687 |
+
**11 Dec 2024**<br/>
|
688 |
+
🚀 Multilingual v0.23<br/>
|
689 |
+
🗣️ 85 total voices
|
690 |
+
|
691 |
**8 Dec 2024**<br/>
|
692 |
🚀 Multilingual v0.22<br/>
|
693 |
🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
|
|
|
757 |
|
758 |
with gr.Blocks() as app:
|
759 |
gr.TabbedInterface(
|
760 |
+
[ml_tts, basic_tts, lf_tts, about, data_card, changelog],
|
761 |
+
['🔥 Latest v0.23', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
|
762 |
)
|
763 |
|
764 |
if __name__ == '__main__':
|