Noumida commited on
Commit
2d8a40a
Β·
verified Β·
1 Parent(s): b9adc15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -23
app.py CHANGED
@@ -7,10 +7,10 @@ import requests
7
  import json
8
  import os
9
  import re
10
- from typing import List, Dict, Optional
11
  from transformers import AutoModel, AutoModelForAudioClassification, Wav2Vec2FeatureExtractor
12
 
13
- DESCRIPTION = "Question Generation"
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  # --- API Configuration ---
@@ -71,6 +71,18 @@ ASR_TO_INDICTRANS_MAP = {
71
  "te": "tel_Telu", "ur": "urd_Arab", "en": "eng_Latn"
72
  }
73
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # --- Question Generation Functions ---
75
  def extract_sentences(text: str) -> List[str]:
76
  """Extract meaningful sentences from text."""
@@ -203,6 +215,44 @@ def translate_indic_to_english(text: str, source_lang: str = "hin_Deva") -> Dict
203
  "translated_text": ""
204
  }
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  @spaces.GPU
207
  def transcribe_audio_with_lid(audio_path):
208
  """Main function to transcribe audio with language detection, translation, and question generation."""
@@ -296,11 +346,11 @@ def transcribe_audio_with_lid(audio_path):
296
  with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Questions") as demo:
297
  gr.Markdown(f"## {DESCRIPTION}")
298
  gr.Markdown("""
299
- 🎀 Upload or record audio in any of the 22 supported Indian languages.
300
 
301
  **Enhanced Features:**
302
- - πŸ” Automatic language detection
303
- - πŸ“ Speech-to-text transcription using RNNT model
304
  - 🌍 Automatic translation to English
305
  - ❓ AI-powered question generation from translated content
306
  - πŸš€ Powered by AI4Bharat IndicConformer, IndicTrans2 & Google AI
@@ -308,31 +358,57 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Question
308
 
309
  with gr.Row():
310
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
311
  audio = gr.Audio(
312
  label="Upload or Record Audio",
313
  type="filepath",
314
- format="wav"
 
 
 
 
 
 
 
 
 
315
  )
316
- transcribe_btn = gr.Button(
317
- "πŸš€ Transcribe, Translate & Generate Questions",
 
 
 
 
 
 
 
 
 
318
  variant="primary",
319
  scale=2
320
  )
321
 
322
  with gr.Column(scale=2):
323
- # Language Detection
324
- detected_lang_output = gr.Label(
325
- label="πŸ” Language Detection Result",
326
  show_label=True
327
  )
328
 
329
- # Transcription Results
330
- with gr.Tab("πŸ“ Transcription"):
331
- gr.Markdown("### RNNT Transcription")
332
- rnnt_output = gr.Textbox(
333
  lines=4,
334
- label="Speech Recognition Output",
335
- placeholder="Transcription will appear here..."
336
  )
337
 
338
  # Translation Results
@@ -351,17 +427,43 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Question
351
  placeholder="AI-generated questions will appear here..."
352
  )
353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  # Event handlers
355
- transcribe_btn.click(
356
- fn=transcribe_audio_with_lid,
357
- inputs=[audio],
358
  outputs=[
359
- detected_lang_output,
360
- rnnt_output,
361
  translation_output,
362
  questions_output
363
  ],
364
- api_name="transcribe"
365
  )
366
 
367
  if __name__ == "__main__":
 
7
  import json
8
  import os
9
  import re
10
+ from typing import List, Dict, Optional, Union
11
  from transformers import AutoModel, AutoModelForAudioClassification, Wav2Vec2FeatureExtractor
12
 
13
+ DESCRIPTION = "Wav2Vec2_IndicConformer STT with Translation & Question Generation"
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  # --- API Configuration ---
 
71
  "te": "tel_Telu", "ur": "urd_Arab", "en": "eng_Latn"
72
  }
73
 
74
+ # Language dropdown options for text input
75
+ LANGUAGE_OPTIONS = {
76
+ "Hindi": "hin_Deva", "Bengali": "ben_Beng", "Telugu": "tel_Telu",
77
+ "Tamil": "tam_Taml", "Gujarati": "guj_Gujr", "Kannada": "kan_Knda",
78
+ "Malayalam": "mal_Mlym", "Marathi": "mar_Deva", "Punjabi": "pan_Guru",
79
+ "Odia": "ory_Orya", "Assamese": "asm_Beng", "Urdu": "urd_Arab",
80
+ "Nepali": "nep_Deva", "Sanskrit": "san_Deva", "Kashmiri": "kas_Deva",
81
+ "Sindhi": "snd_Arab", "Bodo": "brx_Deva", "Dogri": "doi_Deva",
82
+ "Konkani": "gom_Deva", "Maithili": "mai_Deva", "Manipuri": "mni_Beng",
83
+ "Santali": "sat_Olck"
84
+ }
85
+
86
  # --- Question Generation Functions ---
87
  def extract_sentences(text: str) -> List[str]:
88
  """Extract meaningful sentences from text."""
 
215
  "translated_text": ""
216
  }
217
 
218
+ # --- NEW: Text Processing Function ---
219
+ def process_text_input(text: str, language: str) -> tuple:
220
+ """Process direct text input for translation and question generation."""
221
+ if not text or not text.strip():
222
+ return "Please provide text input.", "", "", ""
223
+
224
+ # Get language code
225
+ lang_code = LANGUAGE_OPTIONS.get(language, "hin_Deva")
226
+
227
+ try:
228
+ # Translate to English
229
+ translation_response = translate_indic_to_english(text.strip(), lang_code)
230
+
231
+ if translation_response["success"]:
232
+ translation_result = translation_response["translated_text"]
233
+
234
+ # Generate questions from translated text
235
+ question_response = generate_google_ai_questions(translation_result)
236
+ if question_response["success"]:
237
+ questions_list = question_response["questions"]
238
+ questions_result = "\n".join([f"Q{i+1}: {q}" for i, q in enumerate(questions_list)])
239
+ else:
240
+ questions_result = f"❌ Question generation failed: {question_response.get('error', 'Unknown error')}"
241
+
242
+ else:
243
+ translation_result = f"❌ Translation failed: {translation_response['error']}"
244
+ questions_result = "Cannot generate questions without valid translation."
245
+
246
+ except Exception as e:
247
+ return f"Error processing text: {str(e)}", "", "", ""
248
+
249
+ return (
250
+ f"Text Input Processed (Language: {language})",
251
+ text.strip(),
252
+ translation_result,
253
+ questions_result
254
+ )
255
+
256
  @spaces.GPU
257
  def transcribe_audio_with_lid(audio_path):
258
  """Main function to transcribe audio with language detection, translation, and question generation."""
 
346
  with gr.Blocks(theme=gr.themes.Soft(), title="Indic STT + Translation + Questions") as demo:
347
  gr.Markdown(f"## {DESCRIPTION}")
348
  gr.Markdown("""
349
+ 🎀 Upload/record audio OR input text in any of the 22 supported Indian languages.
350
 
351
  **Enhanced Features:**
352
+ - πŸ” Automatic language detection (for audio)
353
+ - πŸ“ Speech-to-text transcription OR direct text input
354
  - 🌍 Automatic translation to English
355
  - ❓ AI-powered question generation from translated content
356
  - πŸš€ Powered by AI4Bharat IndicConformer, IndicTrans2 & Google AI
 
358
 
359
  with gr.Row():
360
  with gr.Column(scale=1):
361
+ # Input method selection
362
+ input_method = gr.Radio(
363
+ choices=["Audio Input", "Text Input"],
364
+ value="Audio Input",
365
+ label="Choose Input Method"
366
+ )
367
+
368
+ # Audio input (visible by default)
369
  audio = gr.Audio(
370
  label="Upload or Record Audio",
371
  type="filepath",
372
+ format="wav",
373
+ visible=True
374
+ )
375
+
376
+ # Text input (hidden by default)
377
+ text_input = gr.Textbox(
378
+ label="Enter Text in Indian Language",
379
+ placeholder="Type your text here in Hindi, Bengali, Tamil, etc...",
380
+ lines=4,
381
+ visible=False
382
  )
383
+
384
+ # Language selection for text input (hidden by default)
385
+ language_dropdown = gr.Dropdown(
386
+ choices=list(LANGUAGE_OPTIONS.keys()),
387
+ value="Hindi",
388
+ label="Select Language",
389
+ visible=False
390
+ )
391
+
392
+ process_btn = gr.Button(
393
+ "πŸš€ Process & Generate Questions",
394
  variant="primary",
395
  scale=2
396
  )
397
 
398
  with gr.Column(scale=2):
399
+ # Detection/Processing Result
400
+ detection_output = gr.Label(
401
+ label="πŸ” Processing Result",
402
  show_label=True
403
  )
404
 
405
+ # Input/Transcription Results
406
+ with gr.Tab("πŸ“ Input/Transcription"):
407
+ gr.Markdown("### Original Text")
408
+ input_output = gr.Textbox(
409
  lines=4,
410
+ label="Input/Transcription Output",
411
+ placeholder="Original text will appear here..."
412
  )
413
 
414
  # Translation Results
 
427
  placeholder="AI-generated questions will appear here..."
428
  )
429
 
430
+ # Toggle input visibility based on method selection
431
+ def toggle_inputs(method):
432
+ if method == "Audio Input":
433
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
434
+ else: # Text Input
435
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
436
+
437
+ input_method.change(
438
+ fn=toggle_inputs,
439
+ inputs=[input_method],
440
+ outputs=[audio, text_input, language_dropdown]
441
+ )
442
+
443
+ # Main processing function that handles both audio and text
444
+ def process_input(method, audio_file, text, language):
445
+ if method == "Audio Input":
446
+ if audio_file:
447
+ return transcribe_audio_with_lid(audio_file)
448
+ else:
449
+ return "Please upload an audio file.", "", "", ""
450
+ else: # Text Input
451
+ if text:
452
+ return process_text_input(text, language)
453
+ else:
454
+ return "Please enter some text.", "", "", ""
455
+
456
  # Event handlers
457
+ process_btn.click(
458
+ fn=process_input,
459
+ inputs=[input_method, audio, text_input, language_dropdown],
460
  outputs=[
461
+ detection_output,
462
+ input_output,
463
  translation_output,
464
  questions_output
465
  ],
466
+ api_name="process"
467
  )
468
 
469
  if __name__ == "__main__":