Spaces:
Running
on
Zero
Running
on
Zero
| import sys | |
| import os | |
| import torch | |
| import torchaudio | |
| sys.path.insert(0, './CosyVoice') | |
| from cosyvoice.cli.cosyvoice import CosyVoice2 | |
| from cosyvoice.utils.file_utils import load_wav | |
| class CosyVoice2TTS: | |
| def __init__(self, model_dir, device="cuda"): | |
| print(f"[TTS] Loading CosyVoice2 model from {model_dir}...") | |
| # εε§ε樑ε | |
| self.model = CosyVoice2( | |
| model_dir, | |
| load_jit=False, | |
| load_trt=False, | |
| load_vllm=False, | |
| fp16=True | |
| ) | |
| print("[TTS] CosyVoice2 Model loaded successfully.") | |
| def synthesize(self, text, prompt_text, prompt_speech_path, output_path=None, stream=False): | |
| if not text: | |
| return None, None | |
| # ε θ½½ι³ι’ | |
| prompt_speech_16k = load_wav(prompt_speech_path, 16000) | |
| # θ°η¨ zero_shot ζ¨η | |
| output = self.model.inference_zero_shot( | |
| tts_text=text, | |
| prompt_text=prompt_text, | |
| prompt_speech_16k=prompt_speech_16k, | |
| stream=stream | |
| ) | |
| final_audio = [] | |
| # θ·ειζ ·η | |
| sample_rate = getattr(self.model, 'sample_rate', 24000) | |
| for i in output: | |
| final_audio.append(i['tts_speech']) | |
| if not final_audio: | |
| return None, None | |
| full_audio_tensor = torch.cat(final_audio, dim=1) | |
| if output_path: | |
| os.makedirs(os.path.dirname(output_path), exist_ok=True) | |
| torchaudio.save(output_path, full_audio_tensor, sample_rate) | |
| print(f"[TTS] Audio saved to {output_path}") | |
| return sample_rate, full_audio_tensor.cpu().numpy() |