fix: resample TTS audio from 44100Hz to 24000Hz for device compatibility
Model outputs 44100Hz but device expects 24000Hz via Opus. Without resampling, audio plays at wrong speed causing 29s delays between segments. Verified: synthesis+resample takes 0.38s for 1.6s audio. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9b2b875c2b
commit
5679622996
|
|
@ -47,15 +47,25 @@ class TTSProvider(TTSProviderBase):
|
|||
|
||||
def _generate_wav(self, text):
|
||||
"""同步合成,在线程池中调用"""
|
||||
from scipy.signal import resample_poly
|
||||
from math import gcd
|
||||
|
||||
audio = self.tts.generate(text, sid=self.sid, speed=self.speed)
|
||||
samples = np.array(audio.samples, dtype=np.float32)
|
||||
|
||||
# 重采样到目标采样率(设备要求 24000Hz,模型输出 44100Hz)
|
||||
target_sr = 24000
|
||||
if self.sample_rate != target_sr:
|
||||
g = gcd(self.sample_rate, target_sr)
|
||||
samples = resample_poly(samples, target_sr // g, self.sample_rate // g)
|
||||
|
||||
pcm = (samples * 32767).astype(np.int16)
|
||||
|
||||
wav_io = io.BytesIO()
|
||||
with wave.open(wav_io, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(self.sample_rate)
|
||||
wf.setframerate(target_sr)
|
||||
wf.writeframes(pcm.tobytes())
|
||||
return wav_io.getvalue()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue