Spaces:

pratikshahp
/

text-to-speech

Runtime error

pratikshahp commited on Jul 10, 2024

Commit

795d45e

verified ·

1 Parent(s): 4b57ca9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,24 +1,29 @@
-import streamlit as st
-from espnet2.bin.tts_inference import Text2Speech
-# Load the Text2Speech model
-model = Text2Speech.from_pretrained("kan-bayashi/ljspeech_fastspeech2")
-def generate_audio(text):
-    with st.spinner("Generating Speech..."):
-        speech, *_ = model(text)
-    return speech
-def main():
-    st.title("Text to Speech with ESPnet2")
-    text_input = st.text_area("Enter the text to generate speech:", "")
-    if st.button("Generate Speech"):
-        if text_input:
-            audio = generate_audio(text_input)
-            st.audio(audio, format="audio/wav")
-        else:
-            st.warning("Please enter some text.")
-if __name__ == "__main__":
-    main()

+import gradio as gr
+import torchaudio
+from speechbrain.inference.vocoders import HIFIGAN
+from speechbrain.tts import Tacotron2
+# Initialize Tacotron2 TTS model and HIFIGAN vocoder
+tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="/tmpdir_tacotron2")
+hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="/tmpdir_hifigan")
+# Function to generate speech
+def generate_speech(text):
+    # Encode text using Tacotron2
+    mel_output, mel_length = tts_model.encode_text(text)
+    # Decode mel spectrogram to waveform using HIFIGAN vocoder
+    waveform = hifi_gan.decode_batch(mel_output)
+    # Return the generated waveform for Gradio to play
+    return waveform.squeeze(1)
+# Interface for Gradio
+iface = gr.Interface(
+    fn=generate_speech,
+    inputs=gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech..."),
+    outputs=gr.Audio(label="Output Speech")
+)
+# Launch the Gradio interface
+iface.launch()