fix: use turn_detection=stt for Speechmatics per official docs

Speechmatics handles end-of-utterance natively via its Voice Agent
API (ADAPTIVE mode). Use turn_detection="stt" on AgentSession so
LiveKit delegates turn boundaries to the STT engine instead of
conflicting with its own VAD-based turn detection.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-03-03 01:44:10 -08:00
parent db4e70e30c
commit 8f951ad31c
2 changed files with 9 additions and 8 deletions

View File

@ -297,13 +297,18 @@ async def entrypoint(ctx: JobContext) -> None:
engine_type=engine_type, engine_type=engine_type,
) )
# Create and start AgentSession with the full pipeline # Create and start AgentSession with the full pipeline.
session = AgentSession( # Speechmatics handles end-of-utterance natively via its Voice Agent
# API, so we use turn_detection="stt" to let it drive turn boundaries.
session_kwargs = dict(
vad=ctx.proc.userdata["vad"], vad=ctx.proc.userdata["vad"],
stt=stt, stt=stt,
llm=llm, llm=llm,
tts=tts, tts=tts,
) )
if stt_provider == "speechmatics":
session_kwargs["turn_detection"] = "stt"
session = AgentSession(**session_kwargs)
await session.start( await session.start(
agent=IT0VoiceAgent(), agent=IT0VoiceAgent(),

View File

@ -9,7 +9,7 @@ by the livekit-plugins-speechmatics package.
""" """
import logging import logging
from livekit.plugins.speechmatics import STT, TurnDetectionMode from livekit.plugins.speechmatics import STT
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -39,14 +39,10 @@ def create_speechmatics_stt(language: str = "cmn") -> STT:
stt = STT( stt = STT(
language=sm_lang, language=sm_lang,
include_partials=True, include_partials=True,
# Use EXTERNAL turn detection so LiveKit's own VAD handles turn
# boundaries. ADAPTIVE enables a second client-side Silero VAD inside
# the Speechmatics SDK which conflicts with LiveKit's pipeline.
turn_detection_mode=TurnDetectionMode.EXTERNAL,
) )
# Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to # Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to
# ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal # ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal
# option after construction so the raw Speechmatics code is sent. # option after construction so the raw Speechmatics code is sent.
stt._stt_options.language = sm_lang # type: ignore[assignment] stt._stt_options.language = sm_lang # type: ignore[assignment]
logger.info("Speechmatics STT created: language=%s (input=%s), turn_detection=EXTERNAL", sm_lang, language) logger.info("Speechmatics STT created: language=%s (input=%s)", sm_lang, language)
return stt return stt