Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,682 Bytes
4ff5a32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import sys
import os
import torch
import torchaudio
sys.path.insert(0, './CosyVoice')
from cosyvoice.cli.cosyvoice import CosyVoice2
from cosyvoice.utils.file_utils import load_wav
class CosyVoice2TTS:
def __init__(self, model_dir, device="cuda"):
print(f"[TTS] Loading CosyVoice2 model from {model_dir}...")
# εε§ε樑ε
self.model = CosyVoice2(
model_dir,
load_jit=False,
load_trt=False,
load_vllm=False,
fp16=True
)
print("[TTS] CosyVoice2 Model loaded successfully.")
def synthesize(self, text, prompt_text, prompt_speech_path, output_path=None, stream=False):
if not text:
return None, None
# ε θ½½ι³ι’
prompt_speech_16k = load_wav(prompt_speech_path, 16000)
# θ°η¨ zero_shot ζ¨η
output = self.model.inference_zero_shot(
tts_text=text,
prompt_text=prompt_text,
prompt_speech_16k=prompt_speech_16k,
stream=stream
)
final_audio = []
# θ·ειζ ·η
sample_rate = getattr(self.model, 'sample_rate', 24000)
for i in output:
final_audio.append(i['tts_speech'])
if not final_audio:
return None, None
full_audio_tensor = torch.cat(final_audio, dim=1)
if output_path:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
torchaudio.save(output_path, full_audio_tensor, sample_rate)
print(f"[TTS] Audio saved to {output_path}")
return sample_rate, full_audio_tensor.cpu().numpy() |