diff --git a/backend/main/xiaozhi-server/core/providers/tts/sherpa_tts.py b/backend/main/xiaozhi-server/core/providers/tts/sherpa_tts.py index a76cdc4..a997ac6 100644 --- a/backend/main/xiaozhi-server/core/providers/tts/sherpa_tts.py +++ b/backend/main/xiaozhi-server/core/providers/tts/sherpa_tts.py @@ -47,15 +47,25 @@ class TTSProvider(TTSProviderBase): def _generate_wav(self, text): """同步合成,在线程池中调用""" + from scipy.signal import resample_poly + from math import gcd + audio = self.tts.generate(text, sid=self.sid, speed=self.speed) samples = np.array(audio.samples, dtype=np.float32) + + # 重采样到目标采样率(设备要求 24000Hz,模型输出 44100Hz) + target_sr = 24000 + if self.sample_rate != target_sr: + g = gcd(self.sample_rate, target_sr) + samples = resample_poly(samples, target_sr // g, self.sample_rate // g) + pcm = (samples * 32767).astype(np.int16) wav_io = io.BytesIO() with wave.open(wav_io, "wb") as wf: wf.setnchannels(1) wf.setsampwidth(2) - wf.setframerate(self.sample_rate) + wf.setframerate(target_sr) wf.writeframes(pcm.tobytes()) return wav_io.getvalue()