fix: resample OpenAI TTS output from 24kHz to 16kHz WAV
OpenAI TTS returns 24kHz audio which Android MediaPlayer can't play via FlutterSound's pcm16WAV codec. Request raw PCM and resample to 16kHz before wrapping in WAV header, matching the local TTS format. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4456550393
commit
5be7f9c078
|
|
@ -366,7 +366,7 @@ async def tts_synthesize_openai(
|
|||
model: str = Query("tts-1", regex="^(tts-1|tts-1-hd|gpt-4o-mini-tts)$"),
|
||||
voice: str = Query("alloy", regex="^(alloy|ash|ballad|coral|echo|fable|nova|onyx|sage|shimmer)$"),
|
||||
):
|
||||
"""Synthesize text to audio via OpenAI TTS API."""
|
||||
"""Synthesize text to audio via OpenAI TTS API, resampled to 16kHz WAV."""
|
||||
client = _get_openai_client()
|
||||
if client is None:
|
||||
return Response(content="OPENAI_API_KEY not configured", status_code=503)
|
||||
|
|
@ -377,9 +377,16 @@ async def tts_synthesize_openai(
|
|||
model=model,
|
||||
voice=voice,
|
||||
input=text,
|
||||
response_format="wav",
|
||||
response_format="pcm", # raw 24kHz 16-bit mono PCM (no header)
|
||||
)
|
||||
return response.content
|
||||
raw_pcm = response.content
|
||||
# Resample 24kHz → 16kHz to match Flutter player expectations
|
||||
audio_np = np.frombuffer(raw_pcm, dtype=np.int16).astype(np.float32)
|
||||
target_len = int(len(audio_np) / 24000 * 16000)
|
||||
indices = np.linspace(0, len(audio_np) - 1, target_len)
|
||||
resampled = np.interp(indices, np.arange(len(audio_np)), audio_np)
|
||||
pcm_16k = resampled.clip(-32768, 32767).astype(np.int16).tobytes()
|
||||
return _make_wav(pcm_16k, 16000)
|
||||
|
||||
try:
|
||||
wav_bytes = await loop.run_in_executor(None, _synth)
|
||||
|
|
|
|||
Loading…
Reference in New Issue