Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import numpy as np | |
| import torch | |
| from fastapi import FastAPI, File, UploadFile | |
| from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
| os.environ["TRANSFORMERS_CACHE"] = "/app/cache" | |
| app = FastAPI( | |
| title = "Whisper API", | |
| redirect_slashes=False | |
| ) | |
| # Device configuration | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| # Load Whisper model | |
| model_id = "openai/whisper-large-v3-turbo" | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True | |
| ).to(device) | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=model, | |
| tokenizer=processor.tokenizer, | |
| feature_extractor=processor.feature_extractor, | |
| torch_dtype=torch_dtype, | |
| device=device | |
| ) | |
| async def root(): | |
| return {"message": "Welcome to Whisper API!"} | |
| async def transcribe_audio(file: UploadFile = File(...)): | |
| try: | |
| # Save the uploaded file temporarily | |
| with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_audio: | |
| temp_audio.write(await file.read()) | |
| temp_audio.flush() | |
| # Transcribe the audio | |
| result = pipe(temp_audio.name, return_timestamps="word") | |
| return {"transcription": result["chunks"]} | |
| except Exception as e: | |
| return {"error": str(e)} | |