fix: make voice-service startup graceful and fix device config
- Wrap model loading in try/except so server starts even if models fail - Fix device env var mapping (unified 'device' field instead of 'whisper_device') - Default Whisper model to 'base' instead of 'large-v3' (3GB) for CPU deployment - Increase healthcheck start_period to 120s for model download time Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d0447fb69f
commit
3702fa3f52
|
|
@ -300,15 +300,15 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
||||||
- AGENT_SERVICE_URL=http://agent-service:3002
|
- AGENT_SERVICE_URL=http://agent-service:3002
|
||||||
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
|
- WHISPER_MODEL=${WHISPER_MODEL:-base}
|
||||||
- KOKORO_MODEL=${KOKORO_MODEL:-kokoro-82m}
|
- KOKORO_MODEL=${KOKORO_MODEL:-kokoro-82m}
|
||||||
- DEVICE=${VOICE_DEVICE:-cpu}
|
- DEVICE=${VOICE_DEVICE:-cpu}
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:3008/docs')\""]
|
test: ["CMD-SHELL", "python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:3008/docs')\""]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 5s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 5
|
||||||
start_period: 30s
|
start_period: 120s
|
||||||
depends_on:
|
depends_on:
|
||||||
- agent-service
|
- agent-service
|
||||||
networks:
|
networks:
|
||||||
|
|
|
||||||
|
|
@ -25,25 +25,33 @@ app.include_router(twilio_router, prefix="/api/v1/twilio", tags=["twilio"])
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def startup():
|
async def startup():
|
||||||
"""Load models on startup."""
|
"""Load models on startup (graceful — server starts even if models fail)."""
|
||||||
from ..config.settings import settings
|
from ..config.settings import settings
|
||||||
from ..stt.whisper_service import WhisperSTTService
|
|
||||||
from ..tts.kokoro_service import KokoroTTSService
|
|
||||||
from ..vad.silero_service import SileroVADService
|
|
||||||
|
|
||||||
print("Voice service starting up...")
|
print("Voice service starting up...")
|
||||||
|
print(f" Device: {settings.device}")
|
||||||
|
print(f" Whisper model: {settings.whisper_model}")
|
||||||
|
|
||||||
# Initialize STT service
|
# Initialize STT service
|
||||||
|
try:
|
||||||
|
from ..stt.whisper_service import WhisperSTTService
|
||||||
|
|
||||||
stt = WhisperSTTService(
|
stt = WhisperSTTService(
|
||||||
model=settings.whisper_model,
|
model=settings.whisper_model,
|
||||||
device=settings.whisper_device,
|
device=settings.device,
|
||||||
language=settings.whisper_language,
|
language=settings.whisper_language,
|
||||||
)
|
)
|
||||||
await stt.initialize()
|
await stt.initialize()
|
||||||
app.state.stt = stt
|
app.state.stt = stt
|
||||||
print(f"STT model loaded: {settings.whisper_model}")
|
print(f"STT model loaded: {settings.whisper_model}")
|
||||||
|
except Exception as e:
|
||||||
|
app.state.stt = None
|
||||||
|
print(f"WARNING: STT model failed to load: {e}")
|
||||||
|
|
||||||
# Initialize TTS service
|
# Initialize TTS service
|
||||||
|
try:
|
||||||
|
from ..tts.kokoro_service import KokoroTTSService
|
||||||
|
|
||||||
tts = KokoroTTSService(
|
tts = KokoroTTSService(
|
||||||
model=settings.kokoro_model,
|
model=settings.kokoro_model,
|
||||||
voice=settings.kokoro_voice,
|
voice=settings.kokoro_voice,
|
||||||
|
|
@ -51,12 +59,23 @@ async def startup():
|
||||||
await tts.initialize()
|
await tts.initialize()
|
||||||
app.state.tts = tts
|
app.state.tts = tts
|
||||||
print(f"TTS model loaded: {settings.kokoro_model}")
|
print(f"TTS model loaded: {settings.kokoro_model}")
|
||||||
|
except Exception as e:
|
||||||
|
app.state.tts = None
|
||||||
|
print(f"WARNING: TTS model failed to load: {e}")
|
||||||
|
|
||||||
# Initialize VAD service
|
# Initialize VAD service
|
||||||
|
try:
|
||||||
|
from ..vad.silero_service import SileroVADService
|
||||||
|
|
||||||
vad = SileroVADService()
|
vad = SileroVADService()
|
||||||
await vad.initialize()
|
await vad.initialize()
|
||||||
app.state.vad = vad
|
app.state.vad = vad
|
||||||
print("VAD model loaded: Silero VAD")
|
print("VAD model loaded: Silero VAD")
|
||||||
|
except Exception as e:
|
||||||
|
app.state.vad = None
|
||||||
|
print(f"WARNING: VAD model failed to load: {e}")
|
||||||
|
|
||||||
|
print("Voice service startup complete.")
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("shutdown")
|
@app.on_event("shutdown")
|
||||||
|
|
|
||||||
|
|
@ -17,14 +17,16 @@ class Settings(BaseSettings):
|
||||||
agent_service_url: str = "http://agent-service:3002"
|
agent_service_url: str = "http://agent-service:3002"
|
||||||
|
|
||||||
# STT (faster-whisper)
|
# STT (faster-whisper)
|
||||||
whisper_model: str = "large-v3"
|
whisper_model: str = "base"
|
||||||
whisper_device: str = "cuda" # "cuda" or "cpu"
|
|
||||||
whisper_language: str = "zh"
|
whisper_language: str = "zh"
|
||||||
|
|
||||||
# TTS (Kokoro)
|
# TTS (Kokoro)
|
||||||
kokoro_model: str = "kokoro-82m"
|
kokoro_model: str = "kokoro-82m"
|
||||||
kokoro_voice: str = "zh_female_1"
|
kokoro_voice: str = "zh_female_1"
|
||||||
|
|
||||||
|
# Device (cpu or cuda)
|
||||||
|
device: str = "cpu"
|
||||||
|
|
||||||
# Twilio
|
# Twilio
|
||||||
twilio_account_sid: str = ""
|
twilio_account_sid: str = ""
|
||||||
twilio_auth_token: str = ""
|
twilio_auth_token: str = ""
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import numpy as np
|
||||||
class WhisperSTTService:
|
class WhisperSTTService:
|
||||||
"""Speech-to-Text service using faster-whisper."""
|
"""Speech-to-Text service using faster-whisper."""
|
||||||
|
|
||||||
def __init__(self, model: str = "large-v3", device: str = "cuda", language: str = "zh"):
|
def __init__(self, model: str = "base", device: str = "cpu", language: str = "zh"):
|
||||||
self.model_name = model
|
self.model_name = model
|
||||||
self.device = device
|
self.device = device
|
||||||
self.language = language
|
self.language = language
|
||||||
|
|
@ -21,11 +21,14 @@ class WhisperSTTService:
|
||||||
"""Load the Whisper model."""
|
"""Load the Whisper model."""
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
compute_type = "float16" if self.device == "cuda" else "int8"
|
||||||
try:
|
try:
|
||||||
self._model = WhisperModel(
|
self._model = WhisperModel(
|
||||||
self.model_name, device=self.device, compute_type="float16"
|
self.model_name, device=self.device, compute_type=compute_type
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
print(f"Failed to load Whisper model on {self.device}: {e}")
|
||||||
|
if self.device != "cpu":
|
||||||
self._model = WhisperModel(
|
self._model = WhisperModel(
|
||||||
self.model_name, device="cpu", compute_type="int8"
|
self.model_name, device="cpu", compute_type="int8"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue