fix: make voice-service startup graceful and fix device config

- Wrap model loading in try/except so server starts even if models fail - Fix device env var mapping (unified 'device' field instead of 'whisper_device') - Default Whisper model to 'base' instead of 'large-v3' (3GB) for CPU deployment - Increase healthcheck start_period to 120s for model download time Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 00:20:12 -08:00 · 2026-02-20 00:20:12 -08:00 · 3702fa3f52
parent d0447fb69f
commit 3702fa3f52
4 changed files with 59 additions and 35 deletions
--- a/deploy/docker/docker-compose.yml
+++ b/deploy/docker/docker-compose.yml
@ -300,15 +300,15 @@ services:
    environment:
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
      - AGENT_SERVICE_URL=http://agent-service:3002
-      - WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
+      - WHISPER_MODEL=${WHISPER_MODEL:-base}
      - KOKORO_MODEL=${KOKORO_MODEL:-kokoro-82m}
      - DEVICE=${VOICE_DEVICE:-cpu}
    healthcheck:
      test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:3008/docs')\""]
      interval: 30s
-      timeout: 5s
+      timeout: 10s
-      retries: 3
+      retries: 5
-      start_period: 30s
+      start_period: 120s
    depends_on:
      - agent-service
    networks:
--- a/packages/services/voice-service/src/api/main.py
+++ b/packages/services/voice-service/src/api/main.py
@ -25,25 +25,33 @@ app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"])
@app.on_event("startup")
 async def startup():
-    """Load models on startup."""
+    """Load models on startup (graceful — server starts even if models fail)."""
    from ..config.settings import settings
    from ..stt.whisper_service import WhisperSTTService
    from ..tts.kokoro_service import KokoroTTSService
    from ..vad.silero_service import SileroVADService
    print("Voice service starting up...")
    print(f"  Device: {settings.device}")
    print(f"  Whisper model: {settings.whisper_model}")
    # Initialize STT service
    try:
        from ..stt.whisper_service import WhisperSTTService
        stt = WhisperSTTService(
            model=settings.whisper_model,
-        device=settings.whisper_device,
+            device=settings.device,
            language=settings.whisper_language,
        )
        await stt.initialize()
        app.state.stt = stt
        print(f"STT model loaded: {settings.whisper_model}")
    except Exception as e:
        app.state.stt = None
        print(f"WARNING: STT model failed to load: {e}")
    # Initialize TTS service
    try:
        from ..tts.kokoro_service import KokoroTTSService
        tts = KokoroTTSService(
            model=settings.kokoro_model,
            voice=settings.kokoro_voice,
@ -51,12 +59,23 @@ async def startup():
        await tts.initialize()
        app.state.tts = tts
        print(f"TTS model loaded: {settings.kokoro_model}")
    except Exception as e:
        app.state.tts = None
        print(f"WARNING: TTS model failed to load: {e}")
    # Initialize VAD service
    try:
        from ..vad.silero_service import SileroVADService
        vad = SileroVADService()
        await vad.initialize()
        app.state.vad = vad
        print("VAD model loaded: Silero VAD")
    except Exception as e:
        app.state.vad = None
        print(f"WARNING: VAD model failed to load: {e}")
    print("Voice service startup complete.")
@app.on_event("shutdown")
--- a/packages/services/voice-service/src/config/settings.py
+++ b/packages/services/voice-service/src/config/settings.py
@ -17,14 +17,16 @@ class Settings(BaseSettings):
    agent_service_url: str = "http://agent-service:3002"
    # STT (faster-whisper)
-    whisper_model: str = "large-v3"
+    whisper_model: str = "base"
    whisper_device: str = "cuda"  # "cuda" or "cpu"
    whisper_language: str = "zh"
    # TTS (Kokoro)
    kokoro_model: str = "kokoro-82m"
    kokoro_voice: str = "zh_female_1"
    # Device (cpu or cuda)
    device: str = "cpu"
    # Twilio
    twilio_account_sid: str = ""
    twilio_auth_token: str = ""
--- a/packages/services/voice-service/src/stt/whisper_service.py
+++ b/packages/services/voice-service/src/stt/whisper_service.py
@ -11,7 +11,7 @@ import numpy as np
 class WhisperSTTService:
    """Speech-to-Text service using faster-whisper."""
-    def __init__(self, model: str = "large-v3", device: str = "cuda", language: str = "zh"):
+    def __init__(self, model: str = "base", device: str = "cpu", language: str = "zh"):
        self.model_name = model
        self.device = device
        self.language = language
@ -21,11 +21,14 @@ class WhisperSTTService:
        """Load the Whisper model."""
        from faster_whisper import WhisperModel
        compute_type = "float16" if self.device == "cuda" else "int8"
        try:
            self._model = WhisperModel(
-                self.model_name, device=self.device, compute_type="float16"
+                self.model_name, device=self.device, compute_type=compute_type
            )
-        except Exception:
+        except Exception as e:
            print(f"Failed to load Whisper model on {self.device}: {e}")
            if self.device != "cpu":
                self._model = WhisperModel(
                    self.model_name, device="cpu", compute_type="int8"
                )