From db4e70e30ceb1869217c640a369ba3baf24e50e9 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 3 Mar 2026 01:31:33 -0800 Subject: [PATCH] fix: use EXTERNAL turn detection for Speechmatics in LiveKit pipeline ADAPTIVE mode enables a second client-side Silero VAD inside the Speechmatics SDK that conflicts with LiveKit's own VAD pipeline, causing no transcription to be returned. EXTERNAL mode delegates turn detection to LiveKit. Co-Authored-By: Claude Opus 4.6 --- .../voice-agent/src/plugins/speechmatics_stt.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/packages/services/voice-agent/src/plugins/speechmatics_stt.py b/packages/services/voice-agent/src/plugins/speechmatics_stt.py index 6219073..fe500da 100644 --- a/packages/services/voice-agent/src/plugins/speechmatics_stt.py +++ b/packages/services/voice-agent/src/plugins/speechmatics_stt.py @@ -9,7 +9,7 @@ by the livekit-plugins-speechmatics package. """ import logging -from livekit.plugins import speechmatics +from livekit.plugins.speechmatics import STT, TurnDetectionMode logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ _LANG_MAP = { } -def create_speechmatics_stt(language: str = "cmn") -> speechmatics.STT: +def create_speechmatics_stt(language: str = "cmn") -> STT: """Create a Speechmatics STT instance for the voice pipeline. Args: @@ -36,13 +36,17 @@ def create_speechmatics_stt(language: str = "cmn") -> speechmatics.STT: Configured speechmatics.STT instance. """ sm_lang = _LANG_MAP.get(language, language) - stt = speechmatics.STT( + stt = STT( language=sm_lang, include_partials=True, + # Use EXTERNAL turn detection so LiveKit's own VAD handles turn + # boundaries. ADAPTIVE enables a second client-side Silero VAD inside + # the Speechmatics SDK which conflicts with LiveKit's pipeline. + turn_detection_mode=TurnDetectionMode.EXTERNAL, ) # Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to # ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal # option after construction so the raw Speechmatics code is sent. stt._stt_options.language = sm_lang # type: ignore[assignment] - logger.info("Speechmatics STT created: language=%s (input=%s)", sm_lang, language) + logger.info("Speechmatics STT created: language=%s (input=%s), turn_detection=EXTERNAL", sm_lang, language) return stt