Spaces:

Jedi09
/

voice_to_text_gemini

Running

App Files Files Community

Jedi09 commited on 6 days ago

Commit

29a8059

verified ·

1 Parent(s): d9ceebc

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -204

app.py CHANGED Viewed

@@ -8,180 +8,104 @@ import gradio as gr
 from faster_whisper import WhisperModel
 import tempfile
 import time
-import requests
 import os
-# ==================== CONFIGURATION ====================
-MODEL_SIZE = "medium"  # Options: tiny, base, small, medium, large-v3
-# =======================================================
-# Load model once at startup
-print(f"🔄 Model yükleniyor ({MODEL_SIZE})... (Bu işlem birkaç dakika sürebilir)")
 try:
-    model = WhisperModel(
-        MODEL_SIZE,
-        device="cpu",
-        compute_type="int8"
-    )
-    print("✅ Model yüklendi!")
 except Exception as e:
-    print(f"❌ Model yükleme hatası: {e}")
     model = None
-# ==================== AI PROVIDER UTILS ====================
-def call_gemini_with_retry(url, payload, max_retries=3):
-    """
-    Gemini API: Rate limit (429) ve Bağlantı hatalarını yönetir.
-    """
-    for attempt in range(max_retries):
-        try:
-            response = requests.post(url, json=payload, timeout=60)
-            if response.status_code == 200:
-                result = response.json()
-                if "candidates" in result and len(result["candidates"]) > 0:
-                    candidate = result["candidates"][0]
-                    if "content" in candidate and "parts" in candidate["content"]:
-                        parts = candidate["content"]["parts"]
-                        if len(parts) > 0 and "text" in parts[0]:
-                            return parts[0]["text"]
-                return "❌ Beklenmedik API yanıt formatı."
-            elif response.status_code == 429:
-                wait_time = 5 * (2 ** attempt)
-                print(f"⚠️ API yoğun (429), {wait_time} saniye bekleniyor... (Deneme {attempt+1}/{max_retries})")
-                time.sleep(wait_time)
-                continue
-            else:
-                return f"❌ Gemini Hatası: {response.status_code} - {response.text}"
-        except Exception as e:
-            return f"❌ Bağlantı Hatası: {str(e)}"
-    return "⚠️ Gemini sunucuları çok yoğun. Lütfen Hugging Face deneyin."
-def call_huggingface_api(prompt, api_key):
-    """
-    Hugging Face Inference API (Standart/Legacy).
-    Standart 'Read' token ile çalışır. Öze ve basit yapıyı kullanır.
-    """
-    if not api_key.startswith("hf_"):
-        return "⚠️ Geçersiz HF Token. 'hf_' ile başlamalıdır."
-    # Free Tier için en sorunsuz çalışan modeller (Küçük ve Hızlı)
-    models_to_try = [
-        "HuggingFaceH4/zephyr-7b-beta",
-        "microsoft/Phi-3-mini-4k-instruct",
-        "mistralai/Mistral-7B-Instruct-v0.2"
-    ]
-    headers = {"Authorization": f"Bearer {api_key}"}
-    for model in models_to_try:
-        # Standart Inference Endpoint (Daha geniş uyumluluk)
-        url = f"https://api-inference.huggingface.co/models/{model}"
-        # Zephyr/Mistral için prompt formatı
-        formatted_prompt = f"<|system|>\nSen yardımsever bir asistansın.\n<|user|>\n{prompt}\n<|assistant|>\n"
-        payload = {
-            "inputs": formatted_prompt,
-            "parameters": {
-                "max_new_tokens": 512,
-                "return_full_text": False,
-                "temperature": 0.7
-            }
-        }
-        try:
-            print(f"📡 HF Deneniyor (Legacy): {model}...")
-            response = requests.post(url, headers=headers, json=payload, timeout=60)
-            if response.status_code == 200:
-                result = response.json()
-                # Standart liste yanıtı: [{'generated_text': '...'}]
-                if isinstance(result, list) and len(result) > 0 and "generated_text" in result[0]:
-                    return result[0]["generated_text"].strip()
-                # Bazen sözlük dönebilir
-                elif isinstance(result, dict) and "generated_text" in result:
-                     return result["generated_text"].strip()
-            elif response.status_code in [404, 503, 500]:
-                print(f"⚠️ {model} sunucusu yanıt vermedi ({response.status_code})...")
-                continue
-            elif response.status_code == 401:
-                return "❌ Yetkisiz (401). Token hatalı veya 'Read' izni yok."
-            else:
-                print(f"⚠️ Hata ({model}): {response.status_code}")
-                continue
-        except Exception as e:
-            print(f"⚠️ Bağlantı hatası ({model}): {e}")
-            continue
-    return "❌ Hiçbir model yanıt vermedi. Lütfen internetinizi kontrol edin veya daha sonra deneyin."
-# ==================== GENERIC AI INTERFACE ====================
-def summarize_with_ai(text: str, api_key: str, provider: str, custom_prompt: str = "") -> str:
-    """Seçilen sağlayıcı ile metni özetler."""
-    user_key = api_key.strip() if api_key else os.environ.get("GEMINI_API_KEY") if provider == "Google Gemini" else os.environ.get("HF_TOKEN")
-    if not user_key: return f"⚠️ {provider} API Anahtarı bulunamadı."
     if not text or "⚠️" in text: return "⚠️ Önce geçerli bir metin oluşturun."
     clean_text = text.split("───────────────────────────────────")[0].strip()
-    if provider == "Google Gemini":
-        base_instruction = "Aşağıdaki Türkçe metni analiz et, ana başlıkları çıkar ve detaylıca özetle."
-        full_prompt = f"{custom_prompt if custom_prompt else base_instruction}\n\nMetin:\n{clean_text}"
-        # Gemini 2.0 Flash
-        url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={user_key}"
-        headers = {"Content-Type": "application/json"}
-        payload = {
-            "contents": [{"parts": [{"text": full_prompt}]}],
-            "generationConfig": {"temperature": 0.7, "maxOutputTokens": 2048}
-        }
-        return call_gemini_with_retry(url, payload)
-    else: # Hugging Face
-        base_instruction = "Aşağıdaki metni Türkçe olarak maddeler halinde özetle:"
-        full_prompt = f"{custom_prompt if custom_prompt else base_instruction}\n\n{clean_text}"
-        return call_huggingface_api(full_prompt, user_key)
-def translate_with_ai(text: str, api_key: str, provider: str, target_language: str) -> str:
-    """Seçilen sağlayıcı ile çeviri yapar."""
-    user_key = api_key.strip() if api_key else os.environ.get("GEMINI_API_KEY") if provider == "Google Gemini" else os.environ.get("HF_TOKEN")
-    if not user_key: return f"⚠️ {provider} API Anahtarı eksik."
     clean_text = text.split("───────────────────────────────────")[0].strip()
-    target_lang_eng = {"Türkçe": "Turkish", "İngilizce": "English", "Almanca": "German", "Fransızca": "French"}.get(target_language, "English")
-    request_text = f"Translate the following text to {target_lang_eng}. Only provide the translation, no extra text.\n\nText:\n{clean_text}"
-    if provider == "Google Gemini":
-        url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={user_key}"
-        headers = {"Content-Type": "application/json"}
-        payload = {
-            "contents": [{"parts": [{"text": request_text}]}],
-            "generationConfig": {"temperature": 0.3, "maxOutputTokens": 4096}
-        }
-        return call_gemini_with_retry(url, payload)
-    else: # Hugging Face
-        return call_huggingface_api(request_text, user_key)
 def transcribe(audio_path: str, progress=gr.Progress()):
     if model is None:
-        yield "❌ Hata: Model yüklenemedi.", None
         return
     if audio_path is None:
@@ -192,110 +116,88 @@ def transcribe(audio_path: str, progress=gr.Progress()):
         start_time = time.time()
         progress(0, desc="Ses işleniyor...")
-        # 1. Transkripsiyon (Hızlandırılmış Ayarlar)
         segments, info = model.transcribe(
             audio_path,
             language="tr",
-            beam_size=1,      # Hız için 1 (Streaming ile uyumlu)
-            vad_filter=True,  # Sessizliği atla
             word_timestamps=False
         )
         duration = info.duration
         full_text = ""
-        # 2. Streaming Döngüsü
         for segment in segments:
             full_text += segment.text + " "
-            # İlerleme Çubuğu
             if duration > 0:
                 prog = min(segment.end / duration, 0.99)
-                progress(prog, desc=f"Çevriliyor... ({int(segment.end)}/{int(duration)} sn)")
-            # Anlık Çıktı (Henüz dosya yok)
             yield full_text.strip(), None
         elapsed = time.time() - start_time
         final_result = full_text.strip()
         if not final_result:
-            yield "⚠️ Ses anlaşılamadı.", None
             return
-        # 3. Dosya Kaydetme
         progress(0.99, desc="Dosya kaydediliyor...")
-        txt_file = tempfile.NamedTemporaryFile(
-            mode='w', suffix='.txt', delete=False, encoding='utf-8'
-        )
         txt_file.write(final_result)
         txt_file.close()
-        # İstatistik Ekleme
         stats = f"\n\n───────────────────────────────────\n📊 İstatistikler\n• Süre: {duration:.1f} sn\n• İşlem: {elapsed:.1f} sn\n• Hız: {duration/elapsed:.1f}x\n───────────────────────────────────"
         yield final_result + stats, txt_file.name
     except Exception as e:
-        yield f"❌ Hata: {str(e)}", None
-# --- ARAYÜZ ---
-with gr.Blocks(title="Ses Deşifre Pro") as demo:
     gr.HTML("""
         <style>
             footer { display: none !important; }
             .gradio-container { max-width: 900px !important; margin: auto !important; }
         </style>
-        <div style="text-align: center; padding: 30px; background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%); border-radius: 20px; margin-bottom: 20px; color: white;">
-            <h1 style="font-size: 2.2rem; margin: 0;">🎙️ Ses Deşifre & AI Asistan</h1>
-            <p style="opacity: 0.9;">Canlı Transkripsiyon • Multi-Model AI (Gemini + HF) • Streaming</p>
         </div>
     """)
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(label="Ses Kaynağı", type="filepath", sources=["upload", "microphone"])
-            submit_btn = gr.Button("✨ Başlat (Canlı)", variant="primary", size="lg")
     with gr.Row():
         with gr.Column():
-            output_text = gr.Textbox(label="Sonuç (Canlı Akış)", placeholder="Konuşmalar buraya akacak...", lines=12, interactive=False)
-            download_file = gr.File(label="İndir (.txt)")
-    # --- AI ARAÇLARI ---
-    gr.HTML("<h3 style='margin-top: 20px; border-bottom: 1px solid #ddd; padding-bottom: 10px;'>🧠 Yapay Zeka Araçları</h3>")
-    with gr.Row():
-        with gr.Column(scale=1):
-            provider_select = gr.Radio(
-                ["Google Gemini", "Hugging Face (Bedava)"],
-                label="AI Sağlayıcısı",
-                value="Google Gemini",
-                info="Gemini limit hatası verirse Hugging Face seçin."
-            )
-        with gr.Column(scale=2):
-            api_key_input = gr.Textbox(
-                label="🔑 API Anahtarı (Gemini Key veya HF Token)",
-                placeholder="Seçili sağlayıcıya ait anahtar...",
-                type="password"
-            )
     with gr.Tabs():
-        with gr.TabItem("✨ Özetle"):
-            gemini_prompt = gr.Textbox(label="Komut (Opsiyonel)", placeholder="Örn: Madde madde özetle...")
-            gemini_btn = gr.Button("🤖 AI ile Özetle")
-            gemini_output = gr.Textbox(label="Özet", lines=8)
-        with gr.TabItem("🌍 Çevir"):
-            target_lang = gr.Dropdown(["İngilizce", "Almanca", "Fransızca", "Türkçe"], label="Hedef Dil", value="İngilizce")
-            translate_btn = gr.Button("A Çevir")
-            translate_output = gr.Textbox(label="Çeviri", lines=8)
     # --- BAĞLANTILAR ---
     submit_btn.click(transcribe, inputs=[audio_input], outputs=[output_text, download_file])
-    gemini_btn.click(summarize_with_ai, inputs=[output_text, api_key_input, provider_select, gemini_prompt], outputs=gemini_output)
-    translate_btn.click(translate_with_ai, inputs=[output_text, api_key_input, provider_select, target_lang], outputs=translate_output)
 if __name__ == "__main__":
     demo.launch(share=False)

 from faster_whisper import WhisperModel
 import tempfile
 import time
 import os
+# import requests # Artık gerek yok
+from transformers import pipeline
+import torch
+# ==================== CONFIG & MODELS ====================
+# 1. WHISPER MODEL (Ses Deşifre)
+MODEL_SIZE = "medium"
+model = None
 try:
+    print(f"� Whisper {MODEL_SIZE} modeli yükleniyor...")
+    model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
+    print("✅ Whisper Modeli Hazır!")
 except Exception as e:
+    print(f"❌ Whisper Yükleme Hatası: {e}")
     model = None
+# 2. LOCAL AI PIPELINES (Cache)
+summarizer_pipe = None
+translator_pipe = None
+def load_summarizer():
+    global summarizer_pipe
+    if summarizer_pipe is None:
+        print("📥 Özetleme Modeli (mT5-Small) yükleniyor...")
+        device = "cpu" # GPU varsa 0 yapabilirsiniz
+        summarizer_pipe = pipeline("summarization", model="ozcangundes/mt5-small-turkish-summarization", device=-1)
+        print("✅ Özetleme Modeli Hazır!")
+    return summarizer_pipe
+def load_translator():
+    global translator_pipe
+    if translator_pipe is None:
+        print("� Çeviri Modeli (NLLB-200) yükleniyor...")
+        # NLLB cpu'da biraz yavaş olabilir ama kalitelidir
+        translator_pipe = pipeline("translation", model="facebook/nllb-200-distilled-600M", device=-1)
+        print("✅ Çeviri Modeli Hazır!")
+    return translator_pipe
+# ==================== AI FUNCTIONS (LOCAL) ====================
+def summarize_locally(text: str, progress=gr.Progress()) -> str:
+    """Yerel model (mT5) ile özetleme."""
     if not text or "⚠️" in text: return "⚠️ Önce geçerli bir metin oluşturun."
     clean_text = text.split("───────────────────────────────────")[0].strip()
+    if len(clean_text) < 50: return "⚠️ Metin özetlemek için çok kısa."
+    try:
+        progress(0.2, desc="Özetleme modeli yükleniyor...")
+        pipe = load_summarizer()
+        progress(0.5, desc="Metin özetleniyor...")
+        # Maksimum girdi uzunluğunu ve çıktı uzunluğunu ayarla
+        result = pipe(clean_text, max_length=150, min_length=40, do_sample=False)
+        return result[0]['summary_text']
+    except Exception as e:
+        return f"❌ Özetleme Hatası: {str(e)}"
+def translate_locally(text: str, target_language: str, progress=gr.Progress()) -> str:
+    """Yerel model (NLLB) ile çeviri."""
+    if not text or "⚠️" in text: return "⚠️ Çevrilecek metin yok."
     clean_text = text.split("───────────────────────────────────")[0].strip()
+    # NLLB Dil Kodları
+    lang_map = {
+        "İngilizce": "eng_Latn",
+        "Almanca": "deu_Latn",
+        "Fransızca": "fra_Latn",
+        "Türkçe": "tur_Latn"
+    }
+    src_lang = "tur_Latn" # Varsayılan giriş Türkçe
+    tgt_lang = lang_map.get(target_language, "eng_Latn")
+    try:
+        progress(0.2, desc="Çeviri modeli yükleniyor...")
+        pipe = load_translator()
+        progress(0.5, desc=f"Çeviriliyor ({target_language})...")
+        # NLLB pipeline kullanımı: src_lang ve tgt_lang belirtilmeli
+        result = pipe(clean_text, src_lang=src_lang, tgt_lang=tgt_lang, max_length=512)
+        return result[0]['translation_text']
+    except Exception as e:
+        return f"❌ Çeviri Hatası: {str(e)}"
+# ==================== TRANSCRIPTION (WHISPER) ====================
 def transcribe(audio_path: str, progress=gr.Progress()):
     if model is None:
+        yield "❌ Hata: Whisper modeli yüklenemedi.", None
         return
     if audio_path is None:
         start_time = time.time()
         progress(0, desc="Ses işleniyor...")
         segments, info = model.transcribe(
             audio_path,
             language="tr",
+            beam_size=1,
+            vad_filter=True,
             word_timestamps=False
         )
         duration = info.duration
         full_text = ""
         for segment in segments:
             full_text += segment.text + " "
             if duration > 0:
                 prog = min(segment.end / duration, 0.99)
+                progress(prog, desc=f"Dönüştürülüyor... ({int(segment.end)}/{int(duration)} sn)")
             yield full_text.strip(), None
         elapsed = time.time() - start_time
         final_result = full_text.strip()
         if not final_result:
+            yield "⚠️ Ses anlaşılamadı veya sessiz.", None
             return
+        # Dosya Kaydetme
         progress(0.99, desc="Dosya kaydediliyor...")
+        txt_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8')
         txt_file.write(final_result)
         txt_file.close()
         stats = f"\n\n───────────────────────────────────\n📊 İstatistikler\n• Süre: {duration:.1f} sn\n• İşlem: {elapsed:.1f} sn\n• Hız: {duration/elapsed:.1f}x\n───────────────────────────────────"
         yield final_result + stats, txt_file.name
     except Exception as e:
+        yield f"❌ Transkripsiyon Hatası: {str(e)}", None
+# ==================== UI (GRADIO) ====================
+with gr.Blocks(title="Ses Deşifre Pro (Local AI)") as demo:
     gr.HTML("""
         <style>
             footer { display: none !important; }
             .gradio-container { max-width: 900px !important; margin: auto !important; }
         </style>
+        <div style="text-align: center; padding: 30px; background: linear-gradient(135deg, #10b981 0%, #059669 100%); border-radius: 20px; margin-bottom: 20px; color: white;">
+            <h1 style="font-size: 2.2rem; margin: 0;">🎙️ Ses Deşifre & Local AI</h1>
+            <p style="opacity: 0.9;">%100 Çevrimdışı • Token Yok • Limit Yok</p>
         </div>
     """)
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(label="Ses Dosyası", type="filepath", sources=["upload", "microphone"])
+            submit_btn = gr.Button("🚀 Başlat", variant="primary", size="lg")
     with gr.Row():
         with gr.Column():
+            output_text = gr.Textbox(label="Deşifre Metni", placeholder="Sonuçlar burada görünecek...", lines=10, interactive=False)
+            download_file = gr.File(label="Metni İndir (.txt)")
+    # --- LOCAL AI ARAÇLARI ---
+    gr.HTML("<h3 style='margin-top: 20px; border-bottom: 1px solid #ddd; padding-bottom: 10px;'>🧠 Yerel Yapay Zeka (CPU)</h3>")
     with gr.Tabs():
+        with gr.TabItem("✨ Özetle (mT5)"):
+            summary_btn = gr.Button("📝 Metni Özetle")
+            summary_output = gr.Textbox(label="Özet Sonucu", lines=6)
+        with gr.TabItem("🌍 Çevir (NLLB)"):
+             with gr.Row():
+                target_lang = gr.Dropdown(["İngilizce", "Almanca", "Fransızca"], label="Hedef Dil", value="İngilizce")
+                translate_btn = gr.Button("A Çevir")
+             translate_output = gr.Textbox(label="Çeviri Sonucu", lines=6)
     # --- BAĞLANTILAR ---
     submit_btn.click(transcribe, inputs=[audio_input], outputs=[output_text, download_file])
+    summary_btn.click(summarize_locally, inputs=[output_text], outputs=summary_output)
+    translate_btn.click(translate_locally, inputs=[output_text, target_lang], outputs=translate_output)
 if __name__ == "__main__":
     demo.launch(share=False)