diff --git a/packages/services/voice-service/src/api/test_tts.py b/packages/services/voice-service/src/api/test_tts.py index bcfa230..7be1974 100644 --- a/packages/services/voice-service/src/api/test_tts.py +++ b/packages/services/voice-service/src/api/test_tts.py @@ -366,7 +366,7 @@ async def tts_synthesize_openai( model: str = Query("tts-1", regex="^(tts-1|tts-1-hd|gpt-4o-mini-tts)$"), voice: str = Query("alloy", regex="^(alloy|ash|ballad|coral|echo|fable|nova|onyx|sage|shimmer)$"), ): - """Synthesize text to audio via OpenAI TTS API.""" + """Synthesize text to audio via OpenAI TTS API, resampled to 16kHz WAV.""" client = _get_openai_client() if client is None: return Response(content="OPENAI_API_KEY not configured", status_code=503) @@ -377,9 +377,16 @@ async def tts_synthesize_openai( model=model, voice=voice, input=text, - response_format="wav", + response_format="pcm", # raw 24kHz 16-bit mono PCM (no header) ) - return response.content + raw_pcm = response.content + # Resample 24kHz → 16kHz to match Flutter player expectations + audio_np = np.frombuffer(raw_pcm, dtype=np.int16).astype(np.float32) + target_len = int(len(audio_np) / 24000 * 16000) + indices = np.linspace(0, len(audio_np) - 1, target_len) + resampled = np.interp(indices, np.arange(len(audio_np)), audio_np) + pcm_16k = resampled.clip(-32768, 32767).astype(np.int16).tobytes() + return _make_wav(pcm_16k, 16000) try: wav_bytes = await loop.run_in_executor(None, _synth)