hexgrad commited on
Commit
088e714
·
verified ·
1 Parent(s): c9fb475

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -158
app.py CHANGED
@@ -334,93 +334,16 @@ ML_LANGUAGES = {
334
  '🇨🇳 zh-CN': 'z',
335
  }
336
 
337
- ML_CHOICES = dict(
338
- a={
339
- '🇺🇸 🚺 American Female ⭐': 'af',
340
- '🇺🇸 🚺 Bella ⭐': 'af_bella',
341
- '🇺🇸 🚺 Nicole ⭐': 'af_nicole',
342
- '🇺🇸 🚺 Sarah ⭐': 'af_sarah',
343
- '🇺🇸 🚺 Alloy': 'af_alloy',
344
- '🇺🇸 🚺 Jessica 🧪': 'af_jessica',
345
- '🇺🇸 🚺 Matilda 🧪': 'af_matilda',
346
- '🇺🇸 🚺 Nova': 'af_nova',
347
- '🇺🇸 🚺 River': 'af_river',
348
- '🇺🇸 🚺 Sky': 'af_sky',
349
- '🇺🇸 🚹 Adam ⭐': 'am_adam',
350
- '🇺🇸 🚹 Michael ⭐': 'am_michael',
351
- '🇺🇸 🚹 Echo': 'am_echo',
352
- '🇺🇸 🚹 Eric': 'am_eric',
353
- '🇺🇸 🚹 Liam': 'am_liam',
354
- '🇺🇸 🚹 Onyx': 'am_onyx',
355
- '🇺🇸 🚹 Will 🧪': 'am_will',
356
- },
357
- b={
358
- '🇬🇧 🚺 Alice': 'bf_alice',
359
- '🇬🇧 🚺 Lily': 'bf_lily',
360
- '🇬🇧 🚹 Lewis ⭐': 'bm_lewis',
361
- '🇬🇧 🚹 Daniel': 'bm_daniel',
362
- '🇬🇧 🚹 Fable': 'bm_fable',
363
- '🇬🇧 🚹 George': 'bm_george',
364
- },
365
- f={'🇫🇷 🚺 French Alpha': 'fr_alpha'},
366
- j={
367
- '🇯🇵 🚺 Japanese Alpha ⭐': 'jf_alpha',
368
- '🇯🇵 🚺 Japanese Beta': 'jf_theta',
369
- '🇯🇵 🚺 Japanese Gamma': 'jf_iota',
370
- '🇯🇵 🚺 Japanese Delta': 'jf_kappa',
371
- '🇯🇵 🚺 Japanese Epsilon': 'jf_beta_0',
372
- '🇯🇵 🚺 Japanese Zeta': 'jf_gamma_0',
373
- '🇯🇵 🚺 Japanese Eta': 'jf_delta_0',
374
- '🇯🇵 🚺 Japanese Theta': 'jf_epsilon',
375
- '🇯🇵 🚺 Japanese Iota': 'jf_zeta',
376
- '🇯🇵 🚺 Japanese Kappa': 'jf_eta',
377
- '🇯🇵 🚹 Japanese Omega': 'jm_omega',
378
- },
379
- k={
380
- '🇰🇷 🚺 Korean Alpha': 'kf_alpha',
381
- '🇰🇷 🚺 Korean Beta': 'kf_beta',
382
- '🇰🇷 🚺 Korean Gamma': 'kf_gamma',
383
- '🇰🇷 🚺 Korean Delta': 'kf_delta',
384
- '🇰🇷 🚺 Korean Epsilon': 'kf_epsilon',
385
- '🇰🇷 🚺 Korean Zeta': 'kf_zeta',
386
- '🇰🇷 🚺 Korean Eta': 'kf_eta',
387
- '🇰🇷 🚺 Korean Theta': 'kf_theta',
388
- '🇰🇷 🚺 Korean Iota': 'kf_iota',
389
- '🇰🇷 🚺 Korean Kappa': 'kf_kappa',
390
- '🇰🇷 🚺 Korean Lambda': 'kf_lambda',
391
- '🇰🇷 🚺 Korean Mu': 'kf_mu',
392
- '🇰🇷 🚺 Korean Nu': 'kf_nu',
393
- '🇰🇷 🚺 Korean Xi': 'kf_xi',
394
- '🇰🇷 🚺 Korean Omicron': 'kf_omicron',
395
- '🇰🇷 🚹 Korean Pi': 'km_pi',
396
- '🇰🇷 🚹 Korean Rho': 'km_rho',
397
- '🇰🇷 🚹 Korean Sigma': 'km_sigma',
398
- '🇰🇷 🚹 Korean Tau': 'km_tau',
399
- '🇰🇷 🚹 Korean Upsilon': 'km_upsilon',
400
- '🇰🇷 🚹 Korean Phi': 'km_phi',
401
- '🇰🇷 🚹 Korean Chi': 'km_chi',
402
- '🇰🇷 🚹 Korean Psi': 'km_psi',
403
- '🇰🇷 🚹 Korean Omega': 'km_omega',
404
- },
405
- z={
406
- '🇨🇳 🚺 Mandarin Alpha': 'zf_beta',
407
- '🇨🇳 🚺 Mandarin Beta': 'zf_gamma',
408
- '🇨🇳 🚺 Mandarin Gamma': 'zf_delta',
409
- '🇨🇳 🚺 Mandarin Delta': 'zf_epsilon',
410
- '🇨🇳 🚺 Mandarin Epsilon 🧪': 'zf_alpha',
411
- '🇨🇳 🚹 Mandarin Phi': 'zm_phi',
412
- '🇨🇳 🚹 Mandarin Chi': 'zm_chi',
413
- '🇨🇳 🚹 Mandarin Psi': 'zm_psi',
414
- '🇨🇳 🚹 Mandarin Omega': 'zm_omega',
415
- },
416
- )
417
  def change_language(value):
418
  choices = list(ML_CHOICES[value].items())
419
  info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
420
  return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
421
 
422
- from gradio_client import Client
423
- client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
424
  def multilingual(text, voice, speed, trim, sk):
425
  if not text.strip():
426
  return None
@@ -431,7 +354,7 @@ def multilingual(text, voice, speed, trim, sk):
431
  gr.Warning('Input may have been truncated')
432
  except Exception as e:
433
  print('📡', datetime.now(), text, voice, repr(e))
434
- gr.Warning('v0.22 temporarily unavailable')
435
  gr.Info('Switching to v0.19')
436
  audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
437
  return audio
@@ -442,7 +365,7 @@ with gr.Blocks() as ml_tts:
442
  with gr.Row():
443
  with gr.Column():
444
  text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
445
- voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value='af', label='Voice', info='⭐ voices are stable, 🧪 are unstable')
446
  lang.change(fn=change_language, inputs=[lang], outputs=[voice])
447
  with gr.Row():
448
  random_btn = gr.Button('Random Text', variant='secondary')
@@ -457,11 +380,11 @@ with gr.Blocks() as ml_tts:
457
  trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
458
  with gr.Row():
459
  gr.Markdown('''
460
- 🎉 New! Kokoro v0.22 now supports 5 languages. 🎉
461
 
462
  📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
463
 
464
- ⚠️ Multilingual v0.22 does not yet support custom pronunciation, Long Form, or Voice Mixer. You can still use these features for v0.19.
465
 
466
  🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
467
  ''', container=True)
@@ -471,76 +394,6 @@ with gr.Blocks() as ml_tts:
471
  text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
472
  generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
473
 
474
- client_x = Client('hexgrad/kokoro-src-x', hf_token=os.environ['SRC'])
475
- def preview(text, voice, speed, trim, sk):
476
- if not text.strip():
477
- return None
478
- assert sk == os.environ['SK'], ('❌', datetime.now(), text, voice, sk)
479
- try:
480
- audio, out_ps = client_x.predict(text=text, voice=voice, speed=speed, trim=trim, use_gpu=True, sk=sk, api_name='/generate')
481
- if len(out_ps) == 510:
482
- gr.Warning('Input may have been truncated')
483
- except Exception as e:
484
- print('📡', datetime.now(), text, voice, repr(e))
485
- gr.Warning('v0.22x temporarily unavailable')
486
- gr.Info('Switching to v0.19')
487
- audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
488
- return audio
489
-
490
- def vote(btn):
491
- print(btn)
492
- gr.Info('Thanks for the feedback!')
493
-
494
- PREVIEW_CHOICES = {
495
- '🇺🇸 🚺 Heart ❤️': 'af_heart',
496
- '🇺🇸 🚺 Spirit 🦋': 'af_spirit',
497
- '🇬🇧 🚺 Soul 🪽': 'bf_soul',
498
- }
499
-
500
- with gr.Blocks() as preview_tts:
501
- with gr.Row():
502
- gr.Markdown('''
503
- 🧪 Experimental: v0.22x previews a potential change to the default English voice. 🧪
504
-
505
- ☝️ Check out v0.19 and multilingual v0.22 for a lot more voices, languages, and features!
506
-
507
- 📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
508
- ''', container=True)
509
- with gr.Row():
510
- with gr.Column():
511
- text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
512
- voice = gr.Dropdown(list(PREVIEW_CHOICES.items()), value='af_heart', label='Voice', info='🧪 These voices are experimental')
513
- with gr.Row():
514
- random_btn = gr.Button('Random Text', variant='secondary')
515
- generate_btn = gr.Button('Generate', variant='primary')
516
- random_btn.click(get_random_text, inputs=[voice], outputs=[text])
517
- with gr.Column():
518
- audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
519
- with gr.Accordion('Audio Settings', open=False):
520
- autoplay = gr.Checkbox(value=True, label='Autoplay')
521
- autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
522
- speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label='⚡️ Speed', info='Adjust the speaking speed')
523
- trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
524
- with gr.Row():
525
- with gr.Accordion('Feedback', open=True):
526
- with gr.Row():
527
- gr.Markdown('Vote for the voice you like the best among 3 challengers and 1 defender.')
528
- with gr.Row():
529
- heart_btn = gr.Button('🇺🇸 🚺 Heart ❤️', variant='secondary')
530
- heart_btn.click(vote, inputs=[heart_btn])
531
- soul_btn = gr.Button('🇺🇸 🚺 Spirit 🦋', variant='secondary')
532
- soul_btn.click(vote, inputs=[soul_btn])
533
- with gr.Row():
534
- spirit_btn = gr.Button('🇬🇧 🚺 Soul 🪽', variant='secondary')
535
- spirit_btn.click(vote, inputs=[spirit_btn])
536
- old_btn = gr.Button('🇺🇸 🚺 American Female ⭐', variant='secondary')
537
- old_btn.click(vote, inputs=[old_btn])
538
- with gr.Row():
539
- sk = gr.Textbox(visible=False)
540
- text.change(lambda: os.environ['SK'], outputs=[sk])
541
- text.submit(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
542
- generate_btn.click(preview, inputs=[text, voice, speed, trim, sk], outputs=[audio])
543
-
544
  USE_GPU_CHOICES = [('Auto 🔀', 'auto'), ('CPU 💬', False), ('ZeroGPU 📄', True)]
545
  USE_GPU_INFOS = {
546
  'auto': 'Use CPU or GPU, whichever is faster',
@@ -831,6 +684,10 @@ This Space and the underlying Kokoro model are both under development and subjec
831
  '''
832
  with gr.Blocks() as changelog:
833
  gr.Markdown('''
 
 
 
 
834
  **8 Dec 2024**<br/>
835
  🚀 Multilingual v0.22<br/>
836
  🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
@@ -900,8 +757,8 @@ These datasets were **NOT** used to train Kokoro. They may be of interest to aca
900
 
901
  with gr.Blocks() as app:
902
  gr.TabbedInterface(
903
- [preview_tts, ml_tts, basic_tts, lf_tts, about, data_card, changelog],
904
- ['🔥 Preview v0.22x', '🌐 Multilingual v0.22', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
905
  )
906
 
907
  if __name__ == '__main__':
 
334
  '🇨🇳 zh-CN': 'z',
335
  }
336
 
337
+ from gradio_client import Client
338
+ client = Client('hexgrad/kokoro-src', hf_token=os.environ['SRC'])
339
+ import json
340
+ ML_CHOICES = json.loads(client.predict(api_name='/list_voices'))
341
+ DEFAULT_VOICE = list(ML_CHOICES['a'].values())[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  def change_language(value):
343
  choices = list(ML_CHOICES[value].items())
344
  info = 'Missing British voices will be restored later' if value == 'b' else '⭐ voices are stable, 🧪 are unstable'
345
  return gr.Dropdown(choices, value=choices[0][1], label='Voice', info=info)
346
 
 
 
347
  def multilingual(text, voice, speed, trim, sk):
348
  if not text.strip():
349
  return None
 
354
  gr.Warning('Input may have been truncated')
355
  except Exception as e:
356
  print('📡', datetime.now(), text, voice, repr(e))
357
+ gr.Warning('v0.23 temporarily unavailable')
358
  gr.Info('Switching to v0.19')
359
  audio = generate(text, voice=voice, speed=speed, trim=trim, sk=sk)[0]
360
  return audio
 
365
  with gr.Row():
366
  with gr.Column():
367
  text = gr.Textbox(label='Input Text', info='Generate speech for one segment of text, up to ~500 characters')
368
+ voice = gr.Dropdown(list(ML_CHOICES['a'].items()), value=DEFAULT_VOICE, label='Voice', info='⭐ voices are stable, 🧪 are unstable')
369
  lang.change(fn=change_language, inputs=[lang], outputs=[voice])
370
  with gr.Row():
371
  random_btn = gr.Button('Random Text', variant='secondary')
 
380
  trim = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label='✂️ Trim', info='How much to cut from both ends')
381
  with gr.Row():
382
  gr.Markdown('''
383
+ 🎉 New! Kokoro v0.23 now supports 5 languages, including a new default voices. 🎉
384
 
385
  📡 Telemetry: For debugging purposes, the text you enter anywhere in this space may be printed to temporary logs, which are periodically wiped.
386
 
387
+ ⚠️ v0.23 does not yet support custom pronunciation, Long Form, or Voice Mixer. You can still use these features in v0.19.
388
 
389
  🇨🇳🇯🇵🇰🇷 Tokenizers for Chinese, Japanese, and Korean do not correctly handle English letters yet. Remove or convert them to CJK first.
390
  ''', container=True)
 
394
  text.submit(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
395
  generate_btn.click(multilingual, inputs=[text, voice, speed, trim, sk], outputs=[audio])
396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  USE_GPU_CHOICES = [('Auto 🔀', 'auto'), ('CPU 💬', False), ('ZeroGPU 📄', True)]
398
  USE_GPU_INFOS = {
399
  'auto': 'Use CPU or GPU, whichever is faster',
 
684
  '''
685
  with gr.Blocks() as changelog:
686
  gr.Markdown('''
687
+ **11 Dec 2024**<br/>
688
+ 🚀 Multilingual v0.23<br/>
689
+ 🗣️ 85 total voices
690
+
691
  **8 Dec 2024**<br/>
692
  🚀 Multilingual v0.22<br/>
693
  🌐 5 languages: English, Chinese, Japanese, Korean, French<br/>
 
757
 
758
  with gr.Blocks() as app:
759
  gr.TabbedInterface(
760
+ [ml_tts, basic_tts, lf_tts, about, data_card, changelog],
761
+ ['🔥 Latest v0.23', '🗣️ TTS v0.19', '📖 Long Form v0.19', 'ℹ️ About', '📁 Data', '📝 Changelog'],
762
  )
763
 
764
  if __name__ == '__main__':