hexgrad commited on
Commit
4b9083a
·
verified ·
1 Parent(s): f5fbf2c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -2
app.py CHANGED
@@ -217,6 +217,8 @@ def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=4000
217
  return ((SAMPLE_RATE, out), ps)
218
 
219
  with gr.Blocks() as basic_tts:
 
 
220
  with gr.Row():
221
  with gr.Column():
222
  text = gr.Textbox(label='Input Text')
@@ -396,6 +398,8 @@ def extract_text(file):
396
  return None
397
 
398
  with gr.Blocks() as lf_tts:
 
 
399
  with gr.Row():
400
  with gr.Column():
401
  file_input = gr.File(file_types=['.pdf', '.txt'], label='Input File: pdf or txt')
@@ -438,10 +442,36 @@ with gr.Blocks() as lf_tts:
438
  segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
439
  generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  with gr.Blocks() as app:
442
  gr.TabbedInterface(
443
- [basic_tts, lf_tts],
444
- ['Basic TTS', 'Long-Form'],
445
  )
446
 
447
  if __name__ == '__main__':
 
217
  return ((SAMPLE_RATE, out), ps)
218
 
219
  with gr.Blocks() as basic_tts:
220
+ with gr.Row():
221
+ gr.Markdown('Generate speech for one segment of text (up to 510 tokens) using Kokoro, a TTS model with 80 million parameters.')
222
  with gr.Row():
223
  with gr.Column():
224
  text = gr.Textbox(label='Input Text')
 
398
  return None
399
 
400
  with gr.Blocks() as lf_tts:
401
+ with gr.Row():
402
+ gr.Markdown('Generate speech in batches of 100 text segments and automatically join them together. This may exhaust your ZeroGPU quota.')
403
  with gr.Row():
404
  with gr.Column():
405
  file_input = gr.File(file_types=['.pdf', '.txt'], label='Input File: pdf or txt')
 
442
  segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
443
  generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
444
 
445
+ with gr.Blocks() as api_info:
446
+ gr.Markdown("""
447
+ This Space can be used via API. The following code block can be copied and run in one Google Colab cell.
448
+ ```
449
+ # 1. Install the Gradio Python client
450
+ !pip install -q gradio_client
451
+
452
+ # 2. Initialize the client
453
+ from gradio_client import Client
454
+ client = Client('hexgrad/Kokoro-TTS')
455
+
456
+ # 3. Call the generate endpoint, which returns a pair: an audio path and a string of output phonemes
457
+ audio_path, out_ps = client.predict(
458
+ text="How could I know? It's an unanswerable question. Like asking an unborn child if they'll lead a good life. They haven't even been born.",
459
+ voice='af_0',
460
+ api_name='/generate'
461
+ )
462
+
463
+ # 4. Display the audio and print the output phonemes
464
+ from IPython.display import display, Audio
465
+ display(Audio(audio_path))
466
+ print(out_ps)
467
+ ```
468
+ Note that this Space and the underlying Kokoro model are both under development and subject to change. API reliability is not guaranteed. Also, Hugging Face and/or Gradio might enforce rate limits.
469
+ """)
470
+
471
  with gr.Blocks() as app:
472
  gr.TabbedInterface(
473
+ [basic_tts, lf_tts, api_info],
474
+ ['Basic TTS', 'Long-Form', 'Gradio API'],
475
  )
476
 
477
  if __name__ == '__main__':