From 5be7f9c078a19d5956bae48b03fc9464f6116f05 Mon Sep 17 00:00:00 2001 From: hailin Date: Wed, 25 Feb 2026 05:38:39 -0800 Subject: [PATCH] fix: resample OpenAI TTS output from 24kHz to 16kHz WAV OpenAI TTS returns 24kHz audio which Android MediaPlayer can't play via FlutterSound's pcm16WAV codec. Request raw PCM and resample to 16kHz before wrapping in WAV header, matching the local TTS format. Co-Authored-By: Claude Opus 4.6 --- packages/services/voice-service/src/api/test_tts.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/packages/services/voice-service/src/api/test_tts.py b/packages/services/voice-service/src/api/test_tts.py index bcfa230..7be1974 100644 --- a/packages/services/voice-service/src/api/test_tts.py +++ b/packages/services/voice-service/src/api/test_tts.py @@ -366,7 +366,7 @@ async def tts_synthesize_openai( model: str = Query("tts-1", regex="^(tts-1|tts-1-hd|gpt-4o-mini-tts)$"), voice: str = Query("alloy", regex="^(alloy|ash|ballad|coral|echo|fable|nova|onyx|sage|shimmer)$"), ): - """Synthesize text to audio via OpenAI TTS API.""" + """Synthesize text to audio via OpenAI TTS API, resampled to 16kHz WAV.""" client = _get_openai_client() if client is None: return Response(content="OPENAI_API_KEY not configured", status_code=503) @@ -377,9 +377,16 @@ async def tts_synthesize_openai( model=model, voice=voice, input=text, - response_format="wav", + response_format="pcm", # raw 24kHz 16-bit mono PCM (no header) ) - return response.content + raw_pcm = response.content + # Resample 24kHz → 16kHz to match Flutter player expectations + audio_np = np.frombuffer(raw_pcm, dtype=np.int16).astype(np.float32) + target_len = int(len(audio_np) / 24000 * 16000) + indices = np.linspace(0, len(audio_np) - 1, target_len) + resampled = np.interp(indices, np.arange(len(audio_np)), audio_np) + pcm_16k = resampled.clip(-32768, 32767).astype(np.int16).tobytes() + return _make_wav(pcm_16k, 16000) try: wav_bytes = await loop.run_in_executor(None, _synth)