From 0aa20cbc733ecc96294172883cae9540dbc8f65d Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 24 Feb 2026 05:06:02 -0800 Subject: [PATCH] feat: add temporary TTS test page at /api/v1/test/tts Browser-accessible page to test text-to-speech synthesis without going through the full voice pipeline. Co-Authored-By: Claude Opus 4.6 --- .../services/voice-service/src/api/main.py | 2 + .../voice-service/src/api/test_tts.py | 110 ++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 packages/services/voice-service/src/api/test_tts.py diff --git a/packages/services/voice-service/src/api/main.py b/packages/services/voice-service/src/api/main.py index 7a78050..18b4f7f 100644 --- a/packages/services/voice-service/src/api/main.py +++ b/packages/services/voice-service/src/api/main.py @@ -10,6 +10,7 @@ from ..config.settings import settings from .health import router as health_router from .session_router import router as session_router from .twilio_webhook import router as twilio_router +from .test_tts import router as test_tts_router logger = logging.getLogger(__name__) @@ -29,6 +30,7 @@ app.add_middleware( app.include_router(health_router, tags=["health"]) app.include_router(session_router, prefix="/api/v1/voice", tags=["sessions"]) app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"]) +app.include_router(test_tts_router, prefix="/api/v1/test", tags=["test"]) # --------------------------------------------------------------------------- diff --git a/packages/services/voice-service/src/api/test_tts.py b/packages/services/voice-service/src/api/test_tts.py new file mode 100644 index 0000000..448520f --- /dev/null +++ b/packages/services/voice-service/src/api/test_tts.py @@ -0,0 +1,110 @@ +"""Temporary TTS test endpoint — hit /api/v1/test/tts in a browser to verify.""" + +import asyncio +import io +import struct +import numpy as np +from fastapi import APIRouter, Request, Query +from fastapi.responses import HTMLResponse, Response + +router = APIRouter() + +_SAMPLE_RATE = 24000 # Kokoro native output rate + + +def _make_wav(pcm_bytes: bytes, sample_rate: int = 16000) -> bytes: + """Wrap raw 16-bit PCM into a WAV container.""" + buf = io.BytesIO() + num_samples = len(pcm_bytes) // 2 + data_size = num_samples * 2 + # WAV header + buf.write(b"RIFF") + buf.write(struct.pack(" +TTS Test + + +

TTS Test

+ +
+
+ + +""" + + +@router.get("/tts/synthesize") +async def tts_synthesize(request: Request, text: str = Query(..., min_length=1, max_length=500)): + """Synthesize text to WAV audio.""" + tts = getattr(request.app.state, "tts", None) + if tts is None or tts._pipeline is None: + return Response(content="TTS model not loaded", status_code=503) + + # Run TTS in thread pool (CPU-bound) + loop = asyncio.get_event_loop() + def _synth(): + samples = [] + for _, _, audio in tts._pipeline(text, voice=tts.voice): + if hasattr(audio, "numpy"): + samples.append(audio.numpy()) + else: + samples.append(audio) + if not samples: + return b"" + audio_np = np.concatenate(samples) + # Resample 24kHz → 16kHz + target_len = int(len(audio_np) / _SAMPLE_RATE * 16000) + indices = np.linspace(0, len(audio_np) - 1, target_len) + resampled = np.interp(indices, np.arange(len(audio_np)), audio_np) + pcm = (resampled * 32768).clip(-32768, 32767).astype(np.int16).tobytes() + return _make_wav(pcm, 16000) + + wav_bytes = await loop.run_in_executor(None, _synth) + if not wav_bytes: + return Response(content="TTS produced no audio", status_code=500) + + return Response(content=wav_bytes, media_type="audio/wav")