From 5be7f9c078a19d5956bae48b03fc9464f6116f05 Mon Sep 17 00:00:00 2001
From: hailin <hailin.zhao@gdzx.xyz>
Date: Wed, 25 Feb 2026 05:38:39 -0800
Subject: [PATCH] fix: resample OpenAI TTS output from 24kHz to 16kHz WAV

OpenAI TTS returns 24kHz audio which Android MediaPlayer can't play
via FlutterSound's pcm16WAV codec. Request raw PCM and resample to
16kHz before wrapping in WAV header, matching the local TTS format.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 packages/services/voice-service/src/api/test_tts.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/packages/services/voice-service/src/api/test_tts.py b/packages/services/voice-service/src/api/test_tts.py
index bcfa230..7be1974 100644
--- a/packages/services/voice-service/src/api/test_tts.py
+++ b/packages/services/voice-service/src/api/test_tts.py
@@ -366,7 +366,7 @@ async def tts_synthesize_openai(
     model: str = Query("tts-1", regex="^(tts-1|tts-1-hd|gpt-4o-mini-tts)$"),
     voice: str = Query("alloy", regex="^(alloy|ash|ballad|coral|echo|fable|nova|onyx|sage|shimmer)$"),
 ):
-    """Synthesize text to audio via OpenAI TTS API."""
+    """Synthesize text to audio via OpenAI TTS API, resampled to 16kHz WAV."""
     client = _get_openai_client()
     if client is None:
         return Response(content="OPENAI_API_KEY not configured", status_code=503)
@@ -377,9 +377,16 @@ async def tts_synthesize_openai(
             model=model,
             voice=voice,
             input=text,
-            response_format="wav",
+            response_format="pcm",  # raw 24kHz 16-bit mono PCM (no header)
         )
-        return response.content
+        raw_pcm = response.content
+        # Resample 24kHz → 16kHz to match Flutter player expectations
+        audio_np = np.frombuffer(raw_pcm, dtype=np.int16).astype(np.float32)
+        target_len = int(len(audio_np) / 24000 * 16000)
+        indices = np.linspace(0, len(audio_np) - 1, target_len)
+        resampled = np.interp(indices, np.arange(len(audio_np)), audio_np)
+        pcm_16k = resampled.clip(-32768, 32767).astype(np.int16).tobytes()
+        return _make_wav(pcm_16k, 16000)
 
     try:
         wav_bytes = await loop.run_in_executor(None, _synth)