hexgrad commited on
Commit
7a398c4
·
verified ·
1 Parent(s): b9e1a6e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -12
app.py CHANGED
@@ -325,7 +325,7 @@ def generate(text, voice='af', ps=None, speed=1, trim=0.5, use_gpu='auto', sk=No
325
  def toggle_autoplay(autoplay):
326
  return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
327
 
328
- PREVIEW_LANGUAGES = {
329
  '🇺🇸 en-US': 'a',
330
  '🇬🇧 en-GB': 'b',
331
  '🇫🇷 fr-FR': 'f',
@@ -334,7 +334,7 @@ PREVIEW_LANGUAGES = {
334
  '🇨🇳 zh-CN': 'z',
335
  }
336
 
337
- PREVIEW_CHOICES = dict(
338
  a={
339
  '🇺🇸 🚺 American Female ⭐': 'af',
340
  '🇺🇸 🚺 Bella ⭐': 'af_bella',
@@ -415,13 +415,13 @@ z={
415
  },
416
  )
417
  def change_language(value):
418
- choices = list(PREVIEW_CHOICES[value].items())
419
  info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
420
  return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
421
 
422
  from gradio_client import Client
423
  client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
424
- def preview(text, voice, speed, trim, sk):
425
  if not text.strip():
426
  return None
427
  assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
@@ -436,13 +436,13 @@ def preview(text, voice, speed, trim, sk):
436
  audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
437
  return audio
438
 
439
- with gr.Blocks() as preview_tts:
440
  with gr.Row():
441
- lang = gr.Radio(choices=PREVIEW_LANGUAGES.items(), value='a', label='Language', show_label=False)
442
  with gr.Row():
443
  with gr.Column():
444
  text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
445
- voice = gr.Dropdown(list(PREVIEW_CHOICES['a'].items()), value='af', label='Voice', info='⭐ voices are stable, 🧪 are unstable')
446
  lang.change(fn=change_language, inputs=[lang], outputs=[voice])
447
  with gr.Row():
448
  random_btn = gr.Button('Random Text', variant='secondary')
@@ -459,15 +459,71 @@ with gr.Blocks() as preview_tts:
459
  gr.Markdown('''
460
  🎉 New! Kokoro v0.22 now supports 5 languages. 🎉
461
 
462
- 📡 Telemetry: For debugging purposes, the text you enter may be printed to temporary logs, which are periodically wiped.
463
 
464
- ⚠️ Preview v0.22 does not yet support custom pronunciation, Long Form, or Voice Mixer. You can still use these features for v0.19.
465
 
466
  🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
467
  ''', container=True)
468
  with gr.Row():
469
  sk = gr.Textbox(visible=False)
470
  text.change(lambda: os.environ['SK'], outputs=[sk])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  text.submit(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
472
  generate_btn.click(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
473
 
@@ -762,7 +818,7 @@ This Space and the underlying Kokoro model are both under development and subjec
762
  with gr.Blocks() as changelog:
763
  gr.Markdown('''
764
  **8 Dec 2024**<br/>
765
- 🚀 Model Preview v0.22<br/>
766
  🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
767
  🗣️ 68 total voices<br/>
768
  📁 Added data card and telemetry notice
@@ -830,8 +886,8 @@ These datasets were **NOT** used to train Kokoro. They may be of interest to aca
830
 
831
  with gr.Blocks() as app:
832
  gr.TabbedInterface(
833
- [preview_tts, basic_tts, lf_tts, about, data_card, changelog],
834
- ['🔥 Preview v0.22', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
835
  )
836
 
837
  if __name__ == '__main__':
 
325
  def toggle_autoplay(autoplay):
326
  return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
327
 
328
+ ML_LANGUAGES = {
329
  '🇺🇸 en-US': 'a',
330
  '🇬🇧 en-GB': 'b',
331
  '🇫🇷 fr-FR': 'f',
 
334
  '🇨🇳 zh-CN': 'z',
335
  }
336
 
337
+ ML_CHOICES = dict(
338
  a={
339
  '🇺🇸 🚺 American Female ⭐': 'af',
340
  '🇺🇸 🚺 Bella ⭐': 'af_bella',
 
415
  },
416
  )
417
  def change_language(value):
418
+ choices = list(ML_CHOICES[value].items())
419
  info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
420
  return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
421
 
422
  from gradio_client import Client
423
  client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
424
+ def multilingual(text, voice, speed, trim, sk):
425
  if not text.strip():
426
  return None
427
  assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
 
436
  audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
437
  return audio
438
 
439
+ with gr.Blocks() as ml_tts:
440
  with gr.Row():
441
+ lang = gr.Radio(choices=ML_LANGUAGES.items(), value='a', label='Language', show_label=False)
442
  with gr.Row():
443
  with gr.Column():
444
  text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
445
+ voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value='af', label='Voice', info='⭐ voices are stable, 🧪 are unstable')
446
  lang.change(fn=change_language, inputs=[lang], outputs=[voice])
447
  with gr.Row():
448
  random_btn = gr.Button('Random Text', variant='secondary')
 
459
  gr.Markdown('''
460
  🎉 New! Kokoro v0.22 now supports 5 languages. 🎉
461
 
462
+ 📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
463
 
464
+ ⚠️ Multilingual v0.22 does not yet support custom pronunciation, Long Form, or Voice Mixer. You can still use these features for v0.19.
465
 
466
  🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
467
  ''', container=True)
468
  with gr.Row():
469
  sk = gr.Textbox(visible=False)
470
  text.change(lambda: os.environ['SK'], outputs=[sk])
471
+ text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
472
+ generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
473
+
474
+ client = Client('hexgrad/kokoro-src-x', hf_token=os.environ['SRC'])
475
+ def preview(text, voice, speed, trim, sk):
476
+ if not text.strip():
477
+ return None
478
+ assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
479
+ try:
480
+ audio, out_ps = client.predict(text=text, voice=voice, speed=speed, trim=trim, use_gpu=True, sk=sk, api_name='/generate')
481
+ if len(out_ps) == 510:
482
+ gr.Warning('Input may have been truncated')
483
+ except Exception as e:
484
+ print('📡', datetime.now(), text, voice, repr(e))
485
+ gr.Warning('v0.22x temporarily unavailable')
486
+ gr.Info('Switching to v0.19')
487
+ audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
488
+ return audio
489
+
490
+ def vote(btn):
491
+ print(btn)
492
+ gr.Info('Thanks for the feedback!')
493
+
494
+ with gr.Blocks() as preview_tts:
495
+ with gr.Row():
496
+ gr.Markdown('''
497
+ 🧪 Experimental: v0.22x is a single speaker test voice to determine if the default English voice should be changed. 🧪
498
+
499
+ ☝️ Check out v0.19 and multilingual v0.22 for a lot more voices, languages, and features!
500
+
501
+ 📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
502
+ ''', container=True)
503
+ with gr.Row():
504
+ with gr.Column():
505
+ text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
506
+ voice = gr.Dropdown([('🇺🇸 🚺 AF Experimental 🧪', 'afx')], value='afx', label='Voice', info='⭐ voices are stable, 🧪 are unstable', interactive=False)
507
+ with gr.Row():
508
+ random_btn = gr.Button('Random Text', variant='secondary')
509
+ generate_btn = gr.Button('Generate', variant='primary')
510
+ random_btn.click(get_random_text, inputs=[voice], outputs=[text])
511
+ with gr.Column():
512
+ audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
513
+ with gr.Accordion('Audio Settings', open=False):
514
+ autoplay = gr.Checkbox(value=True, label='Autoplay')
515
+ autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
516
+ speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='⚡️ Speed', info='Adjust the speaking speed')
517
+ trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
518
+ with gr.Row():
519
+ with gr.Accordion('Feedback', open=True):
520
+ new_btn = gr.Button('I prefer the new, Experimental 🧪 voice', variant='secondary')
521
+ new_btn.click(new_btn)
522
+ old_btn = gr.Button('I prefer the old, American Female ⭐ voice', variant='secondary')
523
+ old_btn.click(old_btn)
524
+ with gr.Row():
525
+ sk = gr.Textbox(visible=False)
526
+ text.change(lambda: os.environ['SK'], outputs=[sk])
527
  text.submit(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
528
  generate_btn.click(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
529
 
 
818
  with gr.Blocks() as changelog:
819
  gr.Markdown('''
820
  **8 Dec 2024**<br/>
821
+ 🚀 Multilingual v0.22<br/>
822
  🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
823
  🗣️ 68 total voices<br/>
824
  📁 Added data card and telemetry notice
 
886
 
887
  with gr.Blocks() as app:
888
  gr.TabbedInterface(
889
+ [preview_tts, ml_tts, basic_tts, lf_tts, about, data_card, changelog],
890
+ ['🔥 Preview v0.22x', '🌐 Multilingual v0.22', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
891
  )
892
 
893
  if __name__ == '__main__':