Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -217,6 +217,8 @@ def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=4000
|
|
217 |
return ((SAMPLE_RATE, out), ps)
|
218 |
|
219 |
with gr.Blocks() as basic_tts:
|
|
|
|
|
220 |
with gr.Row():
|
221 |
with gr.Column():
|
222 |
text = gr.Textbox(label='Input Text')
|
@@ -396,6 +398,8 @@ def extract_text(file):
|
|
396 |
return None
|
397 |
|
398 |
with gr.Blocks() as lf_tts:
|
|
|
|
|
399 |
with gr.Row():
|
400 |
with gr.Column():
|
401 |
file_input = gr.File(file_types=['.pdf', '.txt'], label='Input File: pdf or txt')
|
@@ -438,10 +442,36 @@ with gr.Blocks() as lf_tts:
|
|
438 |
segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
|
439 |
generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
|
440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
with gr.Blocks() as app:
|
442 |
gr.TabbedInterface(
|
443 |
-
[basic_tts, lf_tts],
|
444 |
-
['Basic TTS', 'Long-Form'],
|
445 |
)
|
446 |
|
447 |
if __name__ == '__main__':
|
|
|
217 |
return ((SAMPLE_RATE, out), ps)
|
218 |
|
219 |
with gr.Blocks() as basic_tts:
|
220 |
+
with gr.Row():
|
221 |
+
gr.Markdown('Generate speech for one segment of text (up to 510 tokens) using Kokoro, a TTS model with 80 million parameters.')
|
222 |
with gr.Row():
|
223 |
with gr.Column():
|
224 |
text = gr.Textbox(label='Input Text')
|
|
|
398 |
return None
|
399 |
|
400 |
with gr.Blocks() as lf_tts:
|
401 |
+
with gr.Row():
|
402 |
+
gr.Markdown('Generate speech in batches of 100 text segments and automatically join them together. This may exhaust your ZeroGPU quota.')
|
403 |
with gr.Row():
|
404 |
with gr.Column():
|
405 |
file_input = gr.File(file_types=['.pdf', '.txt'], label='Input File: pdf or txt')
|
|
|
442 |
segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
|
443 |
generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
|
444 |
|
445 |
+
with gr.Blocks() as api_info:
|
446 |
+
gr.Markdown("""
|
447 |
+
This Space can be used via API. The following code block can be copied and run in one Google Colab cell.
|
448 |
+
```
|
449 |
+
# 1. Install the Gradio Python client
|
450 |
+
!pip install -q gradio_client
|
451 |
+
|
452 |
+
# 2. Initialize the client
|
453 |
+
from gradio_client import Client
|
454 |
+
client = Client('hexgrad/Kokoro-TTS')
|
455 |
+
|
456 |
+
# 3. Call the generate endpoint, which returns a pair: an audio path and a string of output phonemes
|
457 |
+
audio_path, out_ps = client.predict(
|
458 |
+
text="How could I know? It's an unanswerable question. Like asking an unborn child if they'll lead a good life. They haven't even been born.",
|
459 |
+
voice='af_0',
|
460 |
+
api_name='/generate'
|
461 |
+
)
|
462 |
+
|
463 |
+
# 4. Display the audio and print the output phonemes
|
464 |
+
from IPython.display import display, Audio
|
465 |
+
display(Audio(audio_path))
|
466 |
+
print(out_ps)
|
467 |
+
```
|
468 |
+
Note that this Space and the underlying Kokoro model are both under development and subject to change. API reliability is not guaranteed. Also, Hugging Face and/or Gradio might enforce rate limits.
|
469 |
+
""")
|
470 |
+
|
471 |
with gr.Blocks() as app:
|
472 |
gr.TabbedInterface(
|
473 |
+
[basic_tts, lf_tts, api_info],
|
474 |
+
['Basic TTS', 'Long-Form', 'Gradio API'],
|
475 |
)
|
476 |
|
477 |
if __name__ == '__main__':
|