hexgrad commited on
Commit
8fe5320
·
verified ·
1 Parent(s): 4b9083a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -4
app.py CHANGED
@@ -34,7 +34,6 @@ for key, state_dict in torch.load(os.path.join(snapshot, 'net.pth'), map_locatio
34
  model[key].load_state_dict(state_dict, strict=False)
35
 
36
  PARAM_COUNT = sum(p.numel() for value in model.values() for p in value.parameters())
37
- print('PARAM_COUNT', PARAM_COUNT)
38
  assert PARAM_COUNT < 82_000_000, PARAM_COUNT
39
 
40
  random_texts = {}
@@ -442,6 +441,36 @@ with gr.Blocks() as lf_tts:
442
  segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
443
  generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  with gr.Blocks() as api_info:
446
  gr.Markdown("""
447
  This Space can be used via API. The following code block can be copied and run in one Google Colab cell.
@@ -465,13 +494,13 @@ from IPython.display import display, Audio
465
  display(Audio(audio_path))
466
  print(out_ps)
467
  ```
468
- Note that this Space and the underlying Kokoro model are both under development and subject to change. API reliability is not guaranteed. Also, Hugging Face and/or Gradio might enforce rate limits.
469
  """)
470
 
471
  with gr.Blocks() as app:
472
  gr.TabbedInterface(
473
- [basic_tts, lf_tts, api_info],
474
- ['Basic TTS', 'Long-Form', 'Gradio API'],
475
  )
476
 
477
  if __name__ == '__main__':
 
34
  model[key].load_state_dict(state_dict, strict=False)
35
 
36
  PARAM_COUNT = sum(p.numel() for value in model.values() for p in value.parameters())
 
37
  assert PARAM_COUNT < 82_000_000, PARAM_COUNT
38
 
39
  random_texts = {}
 
441
  segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
442
  generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
443
 
444
+ with gr.Blocks() as about:
445
+ gr.Markdown("""
446
+ Kokoro is a frontier TTS model for its size. It has 80 million parameters,<sup>[1]</sup> uses a lean StyleTTS 2 architecture,<sup>[2]</sup> and was trained on high-quality data.
447
+
448
+ The weights are currently private, but a free public demo is hosted at https://hf.co/spaces/hexgrad/Kokoro-TTS
449
+
450
+ ### Compute
451
+ The model was trained on 1x A100-class 80GB instances rented from [Vast.ai](https://cloud.vast.ai/?ref_id=79907).<sup>[3]</sup> Vast was selected over other compute providers due to its competitive on-demand hourly rates. The average hourly cost for the 1x A100-class 80GB VRAM instances used for training was below $1/hr — around half the quoted rates from other providers.
452
+
453
+ ### Updates
454
+ This Space and the underlying Kokoro model are both under development and subject to change.
455
+ Last model update: 2024 Nov 15
456
+ Model trained by: Raven (@rzvzn on Discord)
457
+
458
+ ### Licenses
459
+ Inference code: MIT
460
+ espeak-ng dependency: GPL-3.0<sup>[4]</sup>
461
+ Random English texts: Unknown<sup>[5]</sup>
462
+ Random Japanese texts: CC0 public domain<sup>[6]</sup>
463
+ Kokoro model weights: N/A
464
+
465
+ ### References
466
+ 1. Kokoro parameter count | https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L37
467
+ 2. StyleTTS 2 | https://github.com/yl4579/StyleTTS2
468
+ 3. Vast.ai referral link | https://cloud.vast.ai/?ref_id=79907
469
+ 4. eSpeak NG | https://github.com/espeak-ng/espeak-ng
470
+ 5. Quotable Data | https://github.com/quotable-io/data/blob/master/data/quotes.json
471
+ 6. Common Voice Japanese sentences | https://github.com/common-voice/common-voice/tree/main/server/data/ja
472
+ """)
473
+
474
  with gr.Blocks() as api_info:
475
  gr.Markdown("""
476
  This Space can be used via API. The following code block can be copied and run in one Google Colab cell.
 
494
  display(Audio(audio_path))
495
  print(out_ps)
496
  ```
497
+ Note that this Space and the underlying Kokoro model are both under development and subject to change. Reliability is not guaranteed. Hugging Face and/or Gradio might enforce their own rate limits.
498
  """)
499
 
500
  with gr.Blocks() as app:
501
  gr.TabbedInterface(
502
+ [basic_tts, lf_tts, about, api_info],
503
+ ['Basic TTS', 'Long-Form', 'About', 'Gradio API'],
504
  )
505
 
506
  if __name__ == '__main__':