fix: resample OpenAI TTS output from 24kHz to 16kHz WAV

OpenAI TTS returns 24kHz audio which Android MediaPlayer can't play
via FlutterSound's pcm16WAV codec. Request raw PCM and resample to
16kHz before wrapping in WAV header, matching the local TTS format.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-25 05:38:39 -08:00
parent 4456550393
commit 5be7f9c078
1 changed files with 10 additions and 3 deletions

View File

@ -366,7 +366,7 @@ async def tts_synthesize_openai(
model: str = Query("tts-1", regex="^(tts-1|tts-1-hd|gpt-4o-mini-tts)$"),
voice: str = Query("alloy", regex="^(alloy|ash|ballad|coral|echo|fable|nova|onyx|sage|shimmer)$"),
):
"""Synthesize text to audio via OpenAI TTS API."""
"""Synthesize text to audio via OpenAI TTS API, resampled to 16kHz WAV."""
client = _get_openai_client()
if client is None:
return Response(content="OPENAI_API_KEY not configured", status_code=503)
@ -377,9 +377,16 @@ async def tts_synthesize_openai(
model=model,
voice=voice,
input=text,
response_format="wav",
response_format="pcm", # raw 24kHz 16-bit mono PCM (no header)
)
return response.content
raw_pcm = response.content
# Resample 24kHz → 16kHz to match Flutter player expectations
audio_np = np.frombuffer(raw_pcm, dtype=np.int16).astype(np.float32)
target_len = int(len(audio_np) / 24000 * 16000)
indices = np.linspace(0, len(audio_np) - 1, target_len)
resampled = np.interp(indices, np.arange(len(audio_np)), audio_np)
pcm_16k = resampled.clip(-32768, 32767).astype(np.int16).tobytes()
return _make_wav(pcm_16k, 16000)
try:
wav_bytes = await loop.run_in_executor(None, _synth)