Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -34,7 +34,6 @@ for key, state_dict in torch.load(os.path.join(snapshot, 'net.pth'), map_locatio
|
|
34 |
model[key].load_state_dict(state_dict, strict=False)
|
35 |
|
36 |
PARAM_COUNT = sum(p.numel() for value in model.values() for p in value.parameters())
|
37 |
-
print('PARAM_COUNT', PARAM_COUNT)
|
38 |
assert PARAM_COUNT < 82_000_000, PARAM_COUNT
|
39 |
|
40 |
random_texts = {}
|
@@ -442,6 +441,36 @@ with gr.Blocks() as lf_tts:
|
|
442 |
segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
|
443 |
generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
|
444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
with gr.Blocks() as api_info:
|
446 |
gr.Markdown("""
|
447 |
This Space can be used via API. The following code block can be copied and run in one Google Colab cell.
|
@@ -465,13 +494,13 @@ from IPython.display import display, Audio
|
|
465 |
display(Audio(audio_path))
|
466 |
print(out_ps)
|
467 |
```
|
468 |
-
Note that this Space and the underlying Kokoro model are both under development and subject to change.
|
469 |
""")
|
470 |
|
471 |
with gr.Blocks() as app:
|
472 |
gr.TabbedInterface(
|
473 |
-
[basic_tts, lf_tts, api_info],
|
474 |
-
['Basic TTS', 'Long-Form', 'Gradio API'],
|
475 |
)
|
476 |
|
477 |
if __name__ == '__main__':
|
|
|
34 |
model[key].load_state_dict(state_dict, strict=False)
|
35 |
|
36 |
PARAM_COUNT = sum(p.numel() for value in model.values() for p in value.parameters())
|
|
|
37 |
assert PARAM_COUNT < 82_000_000, PARAM_COUNT
|
38 |
|
39 |
random_texts = {}
|
|
|
441 |
segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
|
442 |
generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
|
443 |
|
444 |
+
with gr.Blocks() as about:
|
445 |
+
gr.Markdown("""
|
446 |
+
Kokoro is a frontier TTS model for its size. It has 80 million parameters,<sup>[1]</sup> uses a lean StyleTTS 2 architecture,<sup>[2]</sup> and was trained on high-quality data.
|
447 |
+
|
448 |
+
The weights are currently private, but a free public demo is hosted at https://hf.co/spaces/hexgrad/Kokoro-TTS
|
449 |
+
|
450 |
+
### Compute
|
451 |
+
The model was trained on 1x A100-class 80GB instances rented from [Vast.ai](https://cloud.vast.ai/?ref_id=79907).<sup>[3]</sup> Vast was selected over other compute providers due to its competitive on-demand hourly rates. The average hourly cost for the 1x A100-class 80GB VRAM instances used for training was below $1/hr — around half the quoted rates from other providers.
|
452 |
+
|
453 |
+
### Updates
|
454 |
+
This Space and the underlying Kokoro model are both under development and subject to change.
|
455 |
+
Last model update: 2024 Nov 15
|
456 |
+
Model trained by: Raven (@rzvzn on Discord)
|
457 |
+
|
458 |
+
### Licenses
|
459 |
+
Inference code: MIT
|
460 |
+
espeak-ng dependency: GPL-3.0<sup>[4]</sup>
|
461 |
+
Random English texts: Unknown<sup>[5]</sup>
|
462 |
+
Random Japanese texts: CC0 public domain<sup>[6]</sup>
|
463 |
+
Kokoro model weights: N/A
|
464 |
+
|
465 |
+
### References
|
466 |
+
1. Kokoro parameter count | https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L37
|
467 |
+
2. StyleTTS 2 | https://github.com/yl4579/StyleTTS2
|
468 |
+
3. Vast.ai referral link | https://cloud.vast.ai/?ref_id=79907
|
469 |
+
4. eSpeak NG | https://github.com/espeak-ng/espeak-ng
|
470 |
+
5. Quotable Data | https://github.com/quotable-io/data/blob/master/data/quotes.json
|
471 |
+
6. Common Voice Japanese sentences | https://github.com/common-voice/common-voice/tree/main/server/data/ja
|
472 |
+
""")
|
473 |
+
|
474 |
with gr.Blocks() as api_info:
|
475 |
gr.Markdown("""
|
476 |
This Space can be used via API. The following code block can be copied and run in one Google Colab cell.
|
|
|
494 |
display(Audio(audio_path))
|
495 |
print(out_ps)
|
496 |
```
|
497 |
+
Note that this Space and the underlying Kokoro model are both under development and subject to change. Reliability is not guaranteed. Hugging Face and/or Gradio might enforce their own rate limits.
|
498 |
""")
|
499 |
|
500 |
with gr.Blocks() as app:
|
501 |
gr.TabbedInterface(
|
502 |
+
[basic_tts, lf_tts, about, api_info],
|
503 |
+
['Basic TTS', 'Long-Form', 'About', 'Gradio API'],
|
504 |
)
|
505 |
|
506 |
if __name__ == '__main__':
|