Spaces:

vicharanashala
/

Voice_recommendation_system

Running

App Files Files Community

Noumida commited on Sep 10

Commit

2d8a40a

verified ·

1 Parent(s): b9adc15

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -23

app.py CHANGED Viewed

@@ -7,10 +7,10 @@ import requests
 import json
 import os
 import re
-from typing import List, Dict, Optional
 from transformers import AutoModel, AutoModelForAudioClassification, Wav2Vec2FeatureExtractor
-DESCRIPTION = "Question Generation"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # --- API Configuration ---
@@ -71,6 +71,18 @@ ASR_TO_INDICTRANS_MAP = {
     "te": "tel_Telu", "ur": "urd_Arab", "en": "eng_Latn"
 }
 # --- Question Generation Functions ---
 def extract_sentences(text: str) -> List[str]:
     """Extract meaningful sentences from text."""
@@ -203,6 +215,44 @@ def translate_indic_to_english(text: str, source_lang: str = "hin_Deva") -> Dict
             "translated_text": ""
         }
 @spaces.GPU
 def transcribe_audio_with_lid(audio_path):
     """Main function to transcribe audio with language detection, translation, and question generation."""
@@ -296,11 +346,11 @@ def transcribe_audio_with_lid(audio_path):
 with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Questions") as demo:
     gr.Markdown(f"## {DESCRIPTION}")
     gr.Markdown("""
-    🎤 Upload or record audio in any of the 22 supported Indian languages.
     **Enhanced Features:**
-    - 🔍 Automatic language detection
-    - 📝 Speech-to-text transcription using RNNT model
     - 🌍 Automatic translation to English
     - ❓ AI-powered question generation from translated content
     - 🚀 Powered by AI4Bharat IndicConformer, IndicTrans2 & Google AI
@@ -308,31 +358,57 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Question
     with gr.Row():
         with gr.Column(scale=1):
             audio = gr.Audio(
                 label="Upload or Record Audio",
                 type="filepath",
-                format="wav"
             )
-            transcribe_btn = gr.Button(
-                "🚀 Transcribe, Translate & Generate Questions",
                 variant="primary",
                 scale=2
             )
         with gr.Column(scale=2):
-            # Language Detection
-            detected_lang_output = gr.Label(
-                label="🔍 Language Detection Result",
                 show_label=True
             )
-            # Transcription Results
-            with gr.Tab("📝 Transcription"):
-                gr.Markdown("### RNNT Transcription")
-                rnnt_output = gr.Textbox(
                     lines=4,
-                    label="Speech Recognition Output",
-                    placeholder="Transcription will appear here..."
                 )
             # Translation Results
@@ -351,17 +427,43 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Question
                     placeholder="AI-generated questions will appear here..."
                 )
     # Event handlers
-    transcribe_btn.click(
-        fn=transcribe_audio_with_lid,
-        inputs=[audio],
         outputs=[
-            detected_lang_output,
-            rnnt_output,
             translation_output,
             questions_output
         ],
-        api_name="transcribe"
     )
 if __name__ == "__main__":

 import json
 import os
 import re
+from typing import List, Dict, Optional, Union
 from transformers import AutoModel, AutoModelForAudioClassification, Wav2Vec2FeatureExtractor
+DESCRIPTION = "Wav2Vec2_IndicConformer STT with Translation & Question Generation"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # --- API Configuration ---
     "te": "tel_Telu", "ur": "urd_Arab", "en": "eng_Latn"
 }
+# Language dropdown options for text input
+LANGUAGE_OPTIONS = {
+    "Hindi": "hin_Deva", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
+    "Tamil": "tam_Taml", "Gujarati": "guj_Gujr", "Kannada": "kan_Knda",
+    "Malayalam": "mal_Mlym", "Marathi": "mar_Deva", "Punjabi": "pan_Guru",
+    "Odia": "ory_Orya", "Assamese": "asm_Beng", "Urdu": "urd_Arab",
+    "Nepali": "nep_Deva", "Sanskrit": "san_Deva", "Kashmiri": "kas_Deva",
+    "Sindhi": "snd_Arab", "Bodo": "brx_Deva", "Dogri": "doi_Deva",
+    "Konkani": "gom_Deva", "Maithili": "mai_Deva", "Manipuri": "mni_Beng",
+    "Santali": "sat_Olck"
+}
 # --- Question Generation Functions ---
 def extract_sentences(text: str) -> List[str]:
     """Extract meaningful sentences from text."""
             "translated_text": ""
         }
+# --- NEW: Text Processing Function ---
+def process_text_input(text: str, language: str) -> tuple:
+    """Process direct text input for translation and question generation."""
+    if not text or not text.strip():
+        return "Please provide text input.", "", "", ""
+    # Get language code
+    lang_code = LANGUAGE_OPTIONS.get(language, "hin_Deva")
+    try:
+        # Translate to English
+        translation_response = translate_indic_to_english(text.strip(), lang_code)
+        if translation_response["success"]:
+            translation_result = translation_response["translated_text"]
+            # Generate questions from translated text
+            question_response = generate_google_ai_questions(translation_result)
+            if question_response["success"]:
+                questions_list = question_response["questions"]
+                questions_result = "\n".join([f"Q{i+1}: {q}" for i, q in enumerate(questions_list)])
+            else:
+                questions_result = f"❌ Question generation failed: {question_response.get('error', 'Unknown error')}"
+        else:
+            translation_result = f"❌ Translation failed: {translation_response['error']}"
+            questions_result = "Cannot generate questions without valid translation."
+    except Exception as e:
+        return f"Error processing text: {str(e)}", "", "", ""
+    return (
+        f"Text Input Processed (Language: {language})",
+        text.strip(),
+        translation_result,
+        questions_result
+    )
 @spaces.GPU
 def transcribe_audio_with_lid(audio_path):
     """Main function to transcribe audio with language detection, translation, and question generation."""
 with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Questions") as demo:
     gr.Markdown(f"## {DESCRIPTION}")
     gr.Markdown("""
+    🎤 Upload/record audio OR input text in any of the 22 supported Indian languages.
     **Enhanced Features:**
+    - 🔍 Automatic language detection (for audio)
+    - 📝 Speech-to-text transcription OR direct text input
     - 🌍 Automatic translation to English
     - ❓ AI-powered question generation from translated content
     - 🚀 Powered by AI4Bharat IndicConformer, IndicTrans2 & Google AI
     with gr.Row():
         with gr.Column(scale=1):
+            # Input method selection
+            input_method = gr.Radio(
+                choices=["Audio Input", "Text Input"],
+                value="Audio Input",
+                label="Choose Input Method"
+            )
+            # Audio input (visible by default)
             audio = gr.Audio(
                 label="Upload or Record Audio",
                 type="filepath",
+                format="wav",
+                visible=True
+            )
+            # Text input (hidden by default)
+            text_input = gr.Textbox(
+                label="Enter Text in Indian Language",
+                placeholder="Type your text here in Hindi, Bengali, Tamil, etc...",
+                lines=4,
+                visible=False
             )
+            # Language selection for text input (hidden by default)
+            language_dropdown = gr.Dropdown(
+                choices=list(LANGUAGE_OPTIONS.keys()),
+                value="Hindi",
+                label="Select Language",
+                visible=False
+            )
+            process_btn = gr.Button(
+                "🚀 Process & Generate Questions",
                 variant="primary",
                 scale=2
             )
         with gr.Column(scale=2):
+            # Detection/Processing Result
+            detection_output = gr.Label(
+                label="🔍 Processing Result",
                 show_label=True
             )
+            # Input/Transcription Results
+            with gr.Tab("📝 Input/Transcription"):
+                gr.Markdown("### Original Text")
+                input_output = gr.Textbox(
                     lines=4,
+                    label="Input/Transcription Output",
+                    placeholder="Original text will appear here..."
                 )
             # Translation Results
                     placeholder="AI-generated questions will appear here..."
                 )
+    # Toggle input visibility based on method selection
+    def toggle_inputs(method):
+        if method == "Audio Input":
+            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+        else:  # Text Input
+            return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
+    input_method.change(
+        fn=toggle_inputs,
+        inputs=[input_method],
+        outputs=[audio, text_input, language_dropdown]
+    )
+    # Main processing function that handles both audio and text
+    def process_input(method, audio_file, text, language):
+        if method == "Audio Input":
+            if audio_file:
+                return transcribe_audio_with_lid(audio_file)
+            else:
+                return "Please upload an audio file.", "", "", ""
+        else:  # Text Input
+            if text:
+                return process_text_input(text, language)
+            else:
+                return "Please enter some text.", "", "", ""
     # Event handlers
+    process_btn.click(
+        fn=process_input,
+        inputs=[input_method, audio, text_input, language_dropdown],
         outputs=[
+            detection_output,
+            input_output,
             translation_output,
             questions_output
         ],
+        api_name="process"
     )
 if __name__ == "__main__":