fix: make voice-service startup graceful and fix device config

- Wrap model loading in try/except so server starts even if models fail
- Fix device env var mapping (unified 'device' field instead of 'whisper_device')
- Default Whisper model to 'base' instead of 'large-v3' (3GB) for CPU deployment
- Increase healthcheck start_period to 120s for model download time

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-20 00:20:12 -08:00
parent d0447fb69f
commit 3702fa3f52
4 changed files with 59 additions and 35 deletions

View File

@ -300,15 +300,15 @@ services:
environment: environment:
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
- AGENT_SERVICE_URL=http://agent-service:3002 - AGENT_SERVICE_URL=http://agent-service:3002
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3} - WHISPER_MODEL=${WHISPER_MODEL:-base}
- KOKORO_MODEL=${KOKORO_MODEL:-kokoro-82m} - KOKORO_MODEL=${KOKORO_MODEL:-kokoro-82m}
- DEVICE=${VOICE_DEVICE:-cpu} - DEVICE=${VOICE_DEVICE:-cpu}
healthcheck: healthcheck:
test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:3008/docs')\""] test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:3008/docs')\""]
interval: 30s interval: 30s
timeout: 5s timeout: 10s
retries: 3 retries: 5
start_period: 30s start_period: 120s
depends_on: depends_on:
- agent-service - agent-service
networks: networks:

View File

@ -25,25 +25,33 @@ app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"])
@app.on_event("startup") @app.on_event("startup")
async def startup(): async def startup():
"""Load models on startup.""" """Load models on startup (graceful — server starts even if models fail)."""
from ..config.settings import settings from ..config.settings import settings
from ..stt.whisper_service import WhisperSTTService
from ..tts.kokoro_service import KokoroTTSService
from ..vad.silero_service import SileroVADService
print("Voice service starting up...") print("Voice service starting up...")
print(f" Device: {settings.device}")
print(f" Whisper model: {settings.whisper_model}")
# Initialize STT service # Initialize STT service
try:
from ..stt.whisper_service import WhisperSTTService
stt = WhisperSTTService( stt = WhisperSTTService(
model=settings.whisper_model, model=settings.whisper_model,
device=settings.whisper_device, device=settings.device,
language=settings.whisper_language, language=settings.whisper_language,
) )
await stt.initialize() await stt.initialize()
app.state.stt = stt app.state.stt = stt
print(f"STT model loaded: {settings.whisper_model}") print(f"STT model loaded: {settings.whisper_model}")
except Exception as e:
app.state.stt = None
print(f"WARNING: STT model failed to load: {e}")
# Initialize TTS service # Initialize TTS service
try:
from ..tts.kokoro_service import KokoroTTSService
tts = KokoroTTSService( tts = KokoroTTSService(
model=settings.kokoro_model, model=settings.kokoro_model,
voice=settings.kokoro_voice, voice=settings.kokoro_voice,
@ -51,12 +59,23 @@ async def startup():
await tts.initialize() await tts.initialize()
app.state.tts = tts app.state.tts = tts
print(f"TTS model loaded: {settings.kokoro_model}") print(f"TTS model loaded: {settings.kokoro_model}")
except Exception as e:
app.state.tts = None
print(f"WARNING: TTS model failed to load: {e}")
# Initialize VAD service # Initialize VAD service
try:
from ..vad.silero_service import SileroVADService
vad = SileroVADService() vad = SileroVADService()
await vad.initialize() await vad.initialize()
app.state.vad = vad app.state.vad = vad
print("VAD model loaded: Silero VAD") print("VAD model loaded: Silero VAD")
except Exception as e:
app.state.vad = None
print(f"WARNING: VAD model failed to load: {e}")
print("Voice service startup complete.")
@app.on_event("shutdown") @app.on_event("shutdown")

View File

@ -17,14 +17,16 @@ class Settings(BaseSettings):
agent_service_url: str = "http://agent-service:3002" agent_service_url: str = "http://agent-service:3002"
# STT (faster-whisper) # STT (faster-whisper)
whisper_model: str = "large-v3" whisper_model: str = "base"
whisper_device: str = "cuda" # "cuda" or "cpu"
whisper_language: str = "zh" whisper_language: str = "zh"
# TTS (Kokoro) # TTS (Kokoro)
kokoro_model: str = "kokoro-82m" kokoro_model: str = "kokoro-82m"
kokoro_voice: str = "zh_female_1" kokoro_voice: str = "zh_female_1"
# Device (cpu or cuda)
device: str = "cpu"
# Twilio # Twilio
twilio_account_sid: str = "" twilio_account_sid: str = ""
twilio_auth_token: str = "" twilio_auth_token: str = ""

View File

@ -11,7 +11,7 @@ import numpy as np
class WhisperSTTService: class WhisperSTTService:
"""Speech-to-Text service using faster-whisper.""" """Speech-to-Text service using faster-whisper."""
def __init__(self, model: str = "large-v3", device: str = "cuda", language: str = "zh"): def __init__(self, model: str = "base", device: str = "cpu", language: str = "zh"):
self.model_name = model self.model_name = model
self.device = device self.device = device
self.language = language self.language = language
@ -21,11 +21,14 @@ class WhisperSTTService:
"""Load the Whisper model.""" """Load the Whisper model."""
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
compute_type = "float16" if self.device == "cuda" else "int8"
try: try:
self._model = WhisperModel( self._model = WhisperModel(
self.model_name, device=self.device, compute_type="float16" self.model_name, device=self.device, compute_type=compute_type
) )
except Exception: except Exception as e:
print(f"Failed to load Whisper model on {self.device}: {e}")
if self.device != "cpu":
self._model = WhisperModel( self._model = WhisperModel(
self.model_name, device="cpu", compute_type="int8" self.model_name, device="cpu", compute_type="int8"
) )