Spaces:

Jedi09
/

voice_to_text_gemini

Running

App Files Files Community

voice_to_text_gemini / app.py

Jedi09

Update app.py

81d3c4d verified 5 days ago

raw

history blame contribute delete

10.2 kB

	import gradio as gr
	from faster_whisper import WhisperModel
	import time
	import os
	import tempfile
	import requests
	import traceback

	# ==================== CONFIG & MODELS ====================

	# 1. WHISPER MODEL (Ses Deşifre - CPU/Local)
	MODEL_SIZE = "medium"
	model = None

	try:
	print(f"📥 Whisper {MODEL_SIZE} modeli yükleniyor...")
	model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
	print("✅ Whisper Modeli Hazır!")
	except Exception as e:
	print(f"❌ Whisper Yükleme Hatası: {e}")
	model = None

	# ==================== AI API FUNCTIONS (Hugging Face Router) ====================

	def call_huggingface_api(prompt, api_key):
	"""
	Hugging Face Serverless Inference API (Router Endpoint).
	OpenAI uyumlu chat/completions formatı kullanır.
	"""
	# Önce environment variable kontrol et, yoksa UI'dan gelen değeri kullan
	token = os.environ.get("HF_TOKEN") or api_key

	if not token:
	return "⚠️ HF Token bulunamadı. Secret olarak veya kutuya girin."
	if not token.startswith("hf_"):
	return "⚠️ Token 'hf_' ile başlamalıdır."

	# Sırayla deneyeceğimiz modeller
	models = [
	"Qwen/Qwen2.5-72B-Instruct",
	"meta-llama/Llama-3.3-70B-Instruct",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"microsoft/Phi-3-mini-4k-instruct"
	]

	# Doğru endpoint: /v1/chat/completions
	url = "https://router.huggingface.co/v1/chat/completions"

	print("=" * 50)
	print(f"🔗 API URL: {url}")
	secret_source = "ENV" if os.environ.get("HF_TOKEN") else "UI"
	print(f"🔑 Token ({secret_source}): {token[:10]}...")
	print("=" * 50)

	headers = {
	"Authorization": f"Bearer {token}",
	"Content-Type": "application/json"
	}

	for model_id in models:
	payload = {
	"model": model_id,
	"messages": [
	{"role": "system", "content": "Sen yardımsever bir Türkçe asistansın."},
	{"role": "user", "content": prompt}
	],
	"max_tokens": 512,
	"temperature": 0.3
	}

	try:
	print(f"\n📡 [{model_id}] İstek gönderiliyor...")
	response = requests.post(url, headers=headers, json=payload, timeout=90)

	print(f"📥 [{model_id}] HTTP Status: {response.status_code}")

	if response.status_code == 200:
	result = response.json()
	print(f"✅ [{model_id}] BAŞARILI!")
	if "choices" in result and len(result["choices"]) > 0:
	return result["choices"][0]["message"]["content"].strip()
	return f"❌ Beklenmedik yanıt: {result}"

	elif response.status_code in [503, 529]:
	print(f"⚠️ [{model_id}] Model meşgul/yükleniyor...")
	continue

	elif response.status_code == 404:
	print(f"⚠️ [{model_id}] Model bulunamadı")
	continue

	elif response.status_code == 401:
	print(f"❌ [{model_id}] YETKİSİZ! Token kontrol edin.")
	print(f" Yanıt: {response.text[:300]}")
	return "❌ Token geçersiz. Lütfen 'Fine-grained' token oluşturup 'Inference' yetkisi verin."

	elif response.status_code == 422:
	print(f"⚠️ [{model_id}] Format hatası: {response.text[:200]}")
	continue

	else:
	error_text = response.text[:300] if len(response.text) > 300 else response.text
	print(f"❌ [{model_id}] HATA ({response.status_code}): {error_text}")
	continue

	except requests.exceptions.Timeout:
	print(f"⏰ [{model_id}] Zaman aşımı (90sn)")
	continue
	except Exception as e:
	print(f"💥 [{model_id}] İSTİSNA DETAYI:")
	print(f" Hata Tipi: {type(e).__name__}")
	print(f" Mesaj: {e}")
	print(" Traceback:")
	traceback.print_exc()
	continue

	print("\n" + "=" * 50)
	print("❌ TÜM MODELLER BAŞARISIZ!")
	print("=" * 50)
	return "❌ Tüm modeller başarısız. Token'ınızı kontrol edin veya daha sonra deneyin."

	def summarize_with_api(text: str, api_key: str) -> str:
	"""Metni özetler."""
	if not text or "⚠️" in text:
	return "⚠️ Özetlenecek metin yok."

	clean_text = text.split("───────────────────────────────────")[0].strip()
	prompt = f"Aşağıdaki metni Türkçe olarak maddeler halinde özetle:\n\n{clean_text}"

	return call_huggingface_api(prompt, api_key)

	def translate_with_api(text: str, target_language: str, api_key: str) -> str:
	"""Metni çevirir."""
	if not text or "⚠️" in text:
	return "⚠️ Çevrilecek metin yok."

	clean_text = text.split("───────────────────────────────────")[0].strip()

	lang_map = {"İngilizce": "English", "Almanca": "German", "Fransızca": "French", "Türkçe": "Turkish"}
	tgt = lang_map.get(target_language, "English")

	prompt = f"Translate the following text to {tgt}. Only provide the translation, no extra text.\n\nText:\n{clean_text}"

	return call_huggingface_api(prompt, api_key)


	# ==================== TRANSCRIPTION (WHISPER - LOCAL) ====================

	def transcribe(audio_path: str, progress=gr.Progress()):
	if model is None:
	yield "❌ Hata: Whisper modeli yüklenemedi.", None
	return

	if audio_path is None:
	yield "⚠️ Lütfen bir ses dosyası yükleyin.", None
	return

	try:
	start_time = time.time()
	progress(0, desc="Ses işleniyor...")

	segments, info = model.transcribe(
	audio_path,
	language="tr",
	beam_size=1,
	vad_filter=True,
	word_timestamps=False
	)

	duration = info.duration
	full_text = ""

	for segment in segments:
	full_text += segment.text + " "
	if duration > 0:
	prog = min(segment.end / duration, 0.99)
	progress(prog, desc=f"Dönüştürülüyor... ({int(segment.end)}/{int(duration)} sn)")
	yield full_text.strip(), None

	elapsed = time.time() - start_time
	final_result = full_text.strip()

	if not final_result:
	yield "⚠️ Ses anlaşılamadı veya sessiz.", None
	return

	progress(0.99, desc="Dosya kaydediliyor...")
	txt_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8')
	txt_file.write(final_result)
	txt_file.close()

	stats = f"\n\n───────────────────────────────────\n📊 İstatistikler\n• Süre: {duration:.1f} sn\n• İşlem: {elapsed:.1f} sn\n• Hız: {duration/elapsed:.1f}x\n───────────────────────────────────"

	yield final_result + stats, txt_file.name

	except Exception as e:
	yield f"❌ Transkripsiyon Hatası: {str(e)}", None

	# ==================== UI (GRADIO) ====================

	with gr.Blocks(title="Voice to Text Manager") as demo:

	gr.HTML("""
	<style>
	footer { display: none !important; }
	.gradio-container { max-width: 900px !important; margin: auto !important; }
	</style>
	<div style="text-align: center; padding: 30px; background: linear-gradient(135deg, #6366f1 0%, #a855f7 100%); border-radius: 20px; margin-bottom: 20px; color: white;">
	<h1 style="font-size: 2.2rem; margin: 0;">🎙️ Voice to Text Manager</h1>
	<p style="opacity: 0.9; font-size: 1.1rem;">1-2 saatlik ses kayıtlarını rahatça deşifre edebilir ve işleyebilirsiniz.</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(label="Ses Dosyası", type="filepath", sources=["upload", "microphone"])
	submit_btn = gr.Button("🚀 Deşifre Et", variant="primary", size="lg")

	with gr.Row():
	with gr.Column():
	output_text = gr.Textbox(label="Metin", placeholder="Sonuçlar burada...", lines=10, interactive=False)
	download_file = gr.File(label="İndir (.txt)")

	gr.HTML("<h3 style='margin-top: 20px; border-bottom: 1px solid #ddd; padding-bottom: 10px;'>☁️ Hugging Face API (Özet & Çeviri)</h3>")

	# Secret durumunu kontrol et
	hf_secret_loaded = bool(os.environ.get("HF_TOKEN"))
	secret_status = "✅ Secret yüklendi (HF_TOKEN)" if hf_secret_loaded else "⚠️ Secret bulunamadı, token girin"

	with gr.Row():
	api_key_input = gr.Textbox(
	label="🔑 HF Token (Opsiyonel - Kendi Tokeninizi Ekleyebilirsiniz)",
	placeholder=secret_status,
	type="password",
	value="" if hf_secret_loaded else None
	)

	with gr.Tabs():
	with gr.TabItem("✨ Özetle"):
	summary_btn = gr.Button("📝 Özetle")
	summary_output = gr.Textbox(label="Özet", lines=6)

	with gr.TabItem("🌍 Çevir"):
	with gr.Row():
	target_lang = gr.Dropdown(["İngilizce", "Almanca", "Fransızca"], label="Hedef Dil", value="İngilizce")
	translate_btn = gr.Button("Çevir")
	translate_output = gr.Textbox(label="Çeviri", lines=6)

	submit_btn.click(transcribe, inputs=[audio_input], outputs=[output_text, download_file])
	summary_btn.click(summarize_with_api, inputs=[output_text, api_key_input], outputs=summary_output)
	translate_btn.click(translate_with_api, inputs=[output_text, target_lang, api_key_input], outputs=translate_output)

	if __name__ == "__main__":
	demo.launch(share=False)