feat: add sherpa-onnx local TTS provider
Offline VITS TTS using sherpa-onnx, no network dependency. Uses vits-melo-tts-zh_en model for Chinese/English. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
12b4994ac0
commit
e5599d4f43
|
|
@ -0,0 +1,55 @@
|
|||
import io
|
||||
import wave
|
||||
import numpy as np
|
||||
import sherpa_onnx
|
||||
from config.logger import setup_logging
|
||||
from core.providers.tts.base import TTSProviderBase
|
||||
|
||||
TAG = __name__
|
||||
logger = setup_logging()
|
||||
|
||||
|
||||
class TTSProvider(TTSProviderBase):
|
||||
def __init__(self, config, delete_audio_file):
|
||||
super().__init__(config, delete_audio_file)
|
||||
model_dir = config.get("model_dir", "models/vits-melo-tts-zh_en")
|
||||
speed = config.get("speed", 1.0)
|
||||
self.speed = float(speed) if speed else 1.0
|
||||
self.sid = int(config.get("sid", 0))
|
||||
|
||||
tts_config = sherpa_onnx.OfflineTtsConfig(
|
||||
model=sherpa_onnx.OfflineTtsModelConfig(
|
||||
vits=sherpa_onnx.OfflineTtsVitsModelConfig(
|
||||
model=f"{model_dir}/model.onnx",
|
||||
lexicon=f"{model_dir}/lexicon.txt",
|
||||
tokens=f"{model_dir}/tokens.txt",
|
||||
dict_dir=f"{model_dir}/dict",
|
||||
),
|
||||
),
|
||||
rule_fsts=f"{model_dir}/date.fst,{model_dir}/phone.fst,{model_dir}/number.fst,{model_dir}/new_heteronym.fst",
|
||||
max_num_sentences=1,
|
||||
)
|
||||
self.tts = sherpa_onnx.OfflineTts(tts_config)
|
||||
self.sample_rate = self.tts.sample_rate
|
||||
logger.bind(tag=TAG).info(
|
||||
f"SherpaOnnxTTS 初始化完成: model_dir={model_dir}, sample_rate={self.sample_rate}, sid={self.sid}"
|
||||
)
|
||||
|
||||
async def text_to_speak(self, text, output_file):
|
||||
audio = self.tts.generate(text, sid=self.sid, speed=self.speed)
|
||||
samples = np.array(audio.samples, dtype=np.float32)
|
||||
pcm = (samples * 32767).astype(np.int16)
|
||||
|
||||
wav_io = io.BytesIO()
|
||||
with wave.open(wav_io, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(self.sample_rate)
|
||||
wf.writeframes(pcm.tobytes())
|
||||
wav_data = wav_io.getvalue()
|
||||
|
||||
if output_file:
|
||||
with open(output_file, "wb") as f:
|
||||
f.write(wav_data)
|
||||
else:
|
||||
return wav_data
|
||||
Loading…
Reference in New Issue