fix: load voice models in background thread to unblock startup
Model downloads (Whisper, Kokoro, Silero VAD) are synchronous blocking calls that prevent uvicorn from completing startup and responding to healthchecks. Move all model loading to a daemon thread so the server starts immediately. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3702fa3f52
commit
a06b489a1e
|
|
@ -1,3 +1,5 @@
|
|||
import threading
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
|
|
@ -23,62 +25,86 @@ app.include_router(session_router, prefix="/api/v1/voice", tags=["sessions"])
|
|||
app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"])
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
"""Load models on startup (graceful — server starts even if models fail)."""
|
||||
def _load_models_sync():
|
||||
"""Load ML models in a background thread (all blocking calls)."""
|
||||
from ..config.settings import settings
|
||||
|
||||
print("Voice service starting up...")
|
||||
print(f" Device: {settings.device}")
|
||||
print(f" Whisper model: {settings.whisper_model}")
|
||||
def _p(msg):
|
||||
print(msg, flush=True)
|
||||
|
||||
# Initialize STT service
|
||||
_p(f"[bg] Loading models (device={settings.device}, whisper={settings.whisper_model})...")
|
||||
|
||||
# STT
|
||||
try:
|
||||
from ..stt.whisper_service import WhisperSTTService
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
stt = WhisperSTTService(
|
||||
model=settings.whisper_model,
|
||||
device=settings.device,
|
||||
language=settings.whisper_language,
|
||||
)
|
||||
await stt.initialize()
|
||||
compute_type = "float16" if settings.device == "cuda" else "int8"
|
||||
try:
|
||||
stt._model = WhisperModel(stt.model_name, device=stt.device, compute_type=compute_type)
|
||||
except Exception as e:
|
||||
_p(f"[bg] Whisper fallback to CPU: {e}")
|
||||
if stt.device != "cpu":
|
||||
stt._model = WhisperModel(stt.model_name, device="cpu", compute_type="int8")
|
||||
app.state.stt = stt
|
||||
print(f"STT model loaded: {settings.whisper_model}")
|
||||
_p(f"[bg] STT loaded: {settings.whisper_model}")
|
||||
except Exception as e:
|
||||
app.state.stt = None
|
||||
print(f"WARNING: STT model failed to load: {e}")
|
||||
_p(f"[bg] WARNING: STT failed: {e}")
|
||||
|
||||
# Initialize TTS service
|
||||
# TTS
|
||||
try:
|
||||
from ..tts.kokoro_service import KokoroTTSService
|
||||
from kokoro import KPipeline
|
||||
|
||||
tts = KokoroTTSService(
|
||||
model=settings.kokoro_model,
|
||||
voice=settings.kokoro_voice,
|
||||
)
|
||||
await tts.initialize()
|
||||
tts = KokoroTTSService(model=settings.kokoro_model, voice=settings.kokoro_voice)
|
||||
tts._pipeline = KPipeline(lang_code='z')
|
||||
app.state.tts = tts
|
||||
print(f"TTS model loaded: {settings.kokoro_model}")
|
||||
_p(f"[bg] TTS loaded: {settings.kokoro_model}")
|
||||
except Exception as e:
|
||||
app.state.tts = None
|
||||
print(f"WARNING: TTS model failed to load: {e}")
|
||||
_p(f"[bg] WARNING: TTS failed: {e}")
|
||||
|
||||
# Initialize VAD service
|
||||
# VAD
|
||||
try:
|
||||
from ..vad.silero_service import SileroVADService
|
||||
import torch
|
||||
|
||||
vad = SileroVADService()
|
||||
await vad.initialize()
|
||||
vad._model, vad._utils = torch.hub.load(
|
||||
repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False
|
||||
)
|
||||
app.state.vad = vad
|
||||
print("VAD model loaded: Silero VAD")
|
||||
_p("[bg] VAD loaded: Silero VAD")
|
||||
except Exception as e:
|
||||
app.state.vad = None
|
||||
print(f"WARNING: VAD model failed to load: {e}")
|
||||
_p(f"[bg] WARNING: VAD failed: {e}")
|
||||
|
||||
print("Voice service startup complete.")
|
||||
_p("[bg] Model loading complete.")
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
"""Start server immediately, load models in background thread."""
|
||||
print("Voice service starting up...", flush=True)
|
||||
|
||||
app.state.stt = None
|
||||
app.state.tts = None
|
||||
app.state.vad = None
|
||||
|
||||
# Load models in background thread so server responds to healthchecks immediately
|
||||
thread = threading.Thread(target=_load_models_sync, daemon=True)
|
||||
thread.start()
|
||||
|
||||
print("Voice service ready (models loading in background).", flush=True)
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown():
|
||||
"""Cleanup on shutdown."""
|
||||
print("Voice service shutting down...")
|
||||
print("Voice service shutting down...", flush=True)
|
||||
|
|
|
|||
Loading…
Reference in New Issue