diff --git a/packages/services/voice-service/src/api/main.py b/packages/services/voice-service/src/api/main.py index f98749d..02530b8 100644 --- a/packages/services/voice-service/src/api/main.py +++ b/packages/services/voice-service/src/api/main.py @@ -1,3 +1,5 @@ +import threading + from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -23,62 +25,86 @@ app.include_router(session_router, prefix="/api/v1/voice", tags=["sessions"]) app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"]) -@app.on_event("startup") -async def startup(): - """Load models on startup (graceful — server starts even if models fail).""" +def _load_models_sync(): + """Load ML models in a background thread (all blocking calls).""" from ..config.settings import settings - print("Voice service starting up...") - print(f" Device: {settings.device}") - print(f" Whisper model: {settings.whisper_model}") + def _p(msg): + print(msg, flush=True) - # Initialize STT service + _p(f"[bg] Loading models (device={settings.device}, whisper={settings.whisper_model})...") + + # STT try: from ..stt.whisper_service import WhisperSTTService + from faster_whisper import WhisperModel stt = WhisperSTTService( model=settings.whisper_model, device=settings.device, language=settings.whisper_language, ) - await stt.initialize() + compute_type = "float16" if settings.device == "cuda" else "int8" + try: + stt._model = WhisperModel(stt.model_name, device=stt.device, compute_type=compute_type) + except Exception as e: + _p(f"[bg] Whisper fallback to CPU: {e}") + if stt.device != "cpu": + stt._model = WhisperModel(stt.model_name, device="cpu", compute_type="int8") app.state.stt = stt - print(f"STT model loaded: {settings.whisper_model}") + _p(f"[bg] STT loaded: {settings.whisper_model}") except Exception as e: app.state.stt = None - print(f"WARNING: STT model failed to load: {e}") + _p(f"[bg] WARNING: STT failed: {e}") - # Initialize TTS service + # TTS try: from ..tts.kokoro_service import KokoroTTSService + from kokoro import KPipeline - tts = KokoroTTSService( - model=settings.kokoro_model, - voice=settings.kokoro_voice, - ) - await tts.initialize() + tts = KokoroTTSService(model=settings.kokoro_model, voice=settings.kokoro_voice) + tts._pipeline = KPipeline(lang_code='z') app.state.tts = tts - print(f"TTS model loaded: {settings.kokoro_model}") + _p(f"[bg] TTS loaded: {settings.kokoro_model}") except Exception as e: app.state.tts = None - print(f"WARNING: TTS model failed to load: {e}") + _p(f"[bg] WARNING: TTS failed: {e}") - # Initialize VAD service + # VAD try: from ..vad.silero_service import SileroVADService + import torch vad = SileroVADService() - await vad.initialize() + vad._model, vad._utils = torch.hub.load( + repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False + ) app.state.vad = vad - print("VAD model loaded: Silero VAD") + _p("[bg] VAD loaded: Silero VAD") except Exception as e: app.state.vad = None - print(f"WARNING: VAD model failed to load: {e}") + _p(f"[bg] WARNING: VAD failed: {e}") - print("Voice service startup complete.") + _p("[bg] Model loading complete.") + + +@app.on_event("startup") +async def startup(): + """Start server immediately, load models in background thread.""" + print("Voice service starting up...", flush=True) + + app.state.stt = None + app.state.tts = None + app.state.vad = None + + # Load models in background thread so server responds to healthchecks immediately + thread = threading.Thread(target=_load_models_sync, daemon=True) + thread.start() + + print("Voice service ready (models loading in background).", flush=True) @app.on_event("shutdown") async def shutdown(): """Cleanup on shutdown.""" - print("Voice service shutting down...") + print("Voice service shutting down...", flush=True)