fix: resample OpenAI TTS output from 24kHz to 16kHz WAV

OpenAI TTS returns 24kHz audio which Android MediaPlayer can't play via FlutterSound's pcm16WAV codec. Request raw PCM and resample to 16kHz before wrapping in WAV header, matching the local TTS format. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 05:38:39 -08:00 · 2026-02-25 05:38:39 -08:00 · 5be7f9c078
parent 4456550393
commit 5be7f9c078
1 changed files with 10 additions and 3 deletions
--- a/packages/services/voice-service/src/api/test_tts.py
+++ b/packages/services/voice-service/src/api/test_tts.py
@ -366,7 +366,7 @@ async def tts_synthesize_openai(
    model: str = Query("tts-1", regex="^(tts-1|tts-1-hd|gpt-4o-mini-tts)$"),
    voice: str = Query("alloy", regex="^(alloy|ash|ballad|coral|echo|fable|nova|onyx|sage|shimmer)$"),
 ):
-    """Synthesize text to audio via OpenAI TTS API."""
+    """Synthesize text to audio via OpenAI TTS API, resampled to 16kHz WAV."""
    client = _get_openai_client()
    if client is None:
        return Response(content="OPENAI_API_KEY not configured", status_code=503)
@ -377,9 +377,16 @@ async def tts_synthesize_openai(
            model=model,
            voice=voice,
            input=text,
-            response_format="wav",
+            response_format="pcm",  # raw 24kHz 16-bit mono PCM (no header)
        )
-        return response.content
+        raw_pcm = response.content
+        # Resample 24kHz → 16kHz to match Flutter player expectations
+        audio_np = np.frombuffer(raw_pcm, dtype=np.int16).astype(np.float32)
+        target_len = int(len(audio_np) / 24000 * 16000)
+        indices = np.linspace(0, len(audio_np) - 1, target_len)
+        resampled = np.interp(indices, np.arange(len(audio_np)), audio_np)
+        pcm_16k = resampled.clip(-32768, 32767).astype(np.int16).tobytes()
+        return _make_wav(pcm_16k, 16000)

    try:
        wav_bytes = await loop.run_in_executor(None, _synth)