feat: add temporary TTS test page at /api/v1/test/tts
Browser-accessible page to test text-to-speech synthesis without going through the full voice pipeline. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
740f8f5f88
commit
0aa20cbc73
|
|
@ -10,6 +10,7 @@ from ..config.settings import settings
|
||||||
from .health import router as health_router
|
from .health import router as health_router
|
||||||
from .session_router import router as session_router
|
from .session_router import router as session_router
|
||||||
from .twilio_webhook import router as twilio_router
|
from .twilio_webhook import router as twilio_router
|
||||||
|
from .test_tts import router as test_tts_router
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -29,6 +30,7 @@ app.add_middleware(
|
||||||
app.include_router(health_router, tags=["health"])
|
app.include_router(health_router, tags=["health"])
|
||||||
app.include_router(session_router, prefix="/api/v1/voice", tags=["sessions"])
|
app.include_router(session_router, prefix="/api/v1/voice", tags=["sessions"])
|
||||||
app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"])
|
app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"])
|
||||||
|
app.include_router(test_tts_router, prefix="/api/v1/test", tags=["test"])
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
"""Temporary TTS test endpoint — hit /api/v1/test/tts in a browser to verify."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import io
|
||||||
|
import struct
|
||||||
|
import numpy as np
|
||||||
|
from fastapi import APIRouter, Request, Query
|
||||||
|
from fastapi.responses import HTMLResponse, Response
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
_SAMPLE_RATE = 24000 # Kokoro native output rate
|
||||||
|
|
||||||
|
|
||||||
|
def _make_wav(pcm_bytes: bytes, sample_rate: int = 16000) -> bytes:
|
||||||
|
"""Wrap raw 16-bit PCM into a WAV container."""
|
||||||
|
buf = io.BytesIO()
|
||||||
|
num_samples = len(pcm_bytes) // 2
|
||||||
|
data_size = num_samples * 2
|
||||||
|
# WAV header
|
||||||
|
buf.write(b"RIFF")
|
||||||
|
buf.write(struct.pack("<I", 36 + data_size))
|
||||||
|
buf.write(b"WAVE")
|
||||||
|
buf.write(b"fmt ")
|
||||||
|
buf.write(struct.pack("<I", 16)) # chunk size
|
||||||
|
buf.write(struct.pack("<H", 1)) # PCM
|
||||||
|
buf.write(struct.pack("<H", 1)) # mono
|
||||||
|
buf.write(struct.pack("<I", sample_rate)) # sample rate
|
||||||
|
buf.write(struct.pack("<I", sample_rate * 2)) # byte rate
|
||||||
|
buf.write(struct.pack("<H", 2)) # block align
|
||||||
|
buf.write(struct.pack("<H", 16)) # bits per sample
|
||||||
|
buf.write(b"data")
|
||||||
|
buf.write(struct.pack("<I", data_size))
|
||||||
|
buf.write(pcm_bytes)
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/tts", response_class=HTMLResponse)
|
||||||
|
async def tts_test_page():
|
||||||
|
"""Simple HTML page to test TTS."""
|
||||||
|
return """<!DOCTYPE html>
|
||||||
|
<html><head><meta charset="utf-8"><title>TTS Test</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: sans-serif; max-width: 600px; margin: 40px auto; padding: 0 20px; }
|
||||||
|
textarea { width: 100%; height: 80px; font-size: 16px; }
|
||||||
|
button { font-size: 18px; padding: 10px 30px; margin-top: 10px; cursor: pointer; }
|
||||||
|
#status { margin-top: 15px; color: #666; }
|
||||||
|
audio { margin-top: 15px; width: 100%; }
|
||||||
|
</style></head>
|
||||||
|
<body>
|
||||||
|
<h2>TTS Test</h2>
|
||||||
|
<textarea id="text" placeholder="输入要合成的文本...">你好,我是IT0运维助手。很高兴为您服务!</textarea>
|
||||||
|
<br><button onclick="doTTS()">合成语音</button>
|
||||||
|
<div id="status"></div>
|
||||||
|
<audio id="player" controls style="display:none"></audio>
|
||||||
|
<script>
|
||||||
|
async function doTTS() {
|
||||||
|
const text = document.getElementById('text').value.trim();
|
||||||
|
if (!text) return;
|
||||||
|
const status = document.getElementById('status');
|
||||||
|
const player = document.getElementById('player');
|
||||||
|
status.textContent = '合成中...';
|
||||||
|
player.style.display = 'none';
|
||||||
|
const t0 = Date.now();
|
||||||
|
try {
|
||||||
|
const resp = await fetch('/api/v1/test/tts/synthesize?text=' + encodeURIComponent(text));
|
||||||
|
if (!resp.ok) { status.textContent = 'Error: ' + resp.status; return; }
|
||||||
|
const blob = await resp.blob();
|
||||||
|
const ms = Date.now() - t0;
|
||||||
|
status.textContent = '完成!耗时 ' + ms + 'ms,大小 ' + (blob.size/1024).toFixed(1) + 'KB';
|
||||||
|
player.src = URL.createObjectURL(blob);
|
||||||
|
player.style.display = 'block';
|
||||||
|
player.play();
|
||||||
|
} catch(e) { status.textContent = 'Error: ' + e.message; }
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
</body></html>"""
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/tts/synthesize")
|
||||||
|
async def tts_synthesize(request: Request, text: str = Query(..., min_length=1, max_length=500)):
|
||||||
|
"""Synthesize text to WAV audio."""
|
||||||
|
tts = getattr(request.app.state, "tts", None)
|
||||||
|
if tts is None or tts._pipeline is None:
|
||||||
|
return Response(content="TTS model not loaded", status_code=503)
|
||||||
|
|
||||||
|
# Run TTS in thread pool (CPU-bound)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
def _synth():
|
||||||
|
samples = []
|
||||||
|
for _, _, audio in tts._pipeline(text, voice=tts.voice):
|
||||||
|
if hasattr(audio, "numpy"):
|
||||||
|
samples.append(audio.numpy())
|
||||||
|
else:
|
||||||
|
samples.append(audio)
|
||||||
|
if not samples:
|
||||||
|
return b""
|
||||||
|
audio_np = np.concatenate(samples)
|
||||||
|
# Resample 24kHz → 16kHz
|
||||||
|
target_len = int(len(audio_np) / _SAMPLE_RATE * 16000)
|
||||||
|
indices = np.linspace(0, len(audio_np) - 1, target_len)
|
||||||
|
resampled = np.interp(indices, np.arange(len(audio_np)), audio_np)
|
||||||
|
pcm = (resampled * 32768).clip(-32768, 32767).astype(np.int16).tobytes()
|
||||||
|
return _make_wav(pcm, 16000)
|
||||||
|
|
||||||
|
wav_bytes = await loop.run_in_executor(None, _synth)
|
||||||
|
if not wav_bytes:
|
||||||
|
return Response(content="TTS produced no audio", status_code=500)
|
||||||
|
|
||||||
|
return Response(content=wav_bytes, media_type="audio/wav")
|
||||||
Loading…
Reference in New Issue