diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py index a637024..d1af8c7 100644 --- a/packages/services/voice-agent/src/agent.py +++ b/packages/services/voice-agent/src/agent.py @@ -297,13 +297,18 @@ async def entrypoint(ctx: JobContext) -> None: engine_type=engine_type, ) - # Create and start AgentSession with the full pipeline - session = AgentSession( + # Create and start AgentSession with the full pipeline. + # Speechmatics handles end-of-utterance natively via its Voice Agent + # API, so we use turn_detection="stt" to let it drive turn boundaries. + session_kwargs = dict( vad=ctx.proc.userdata["vad"], stt=stt, llm=llm, tts=tts, ) + if stt_provider == "speechmatics": + session_kwargs["turn_detection"] = "stt" + session = AgentSession(**session_kwargs) await session.start( agent=IT0VoiceAgent(), diff --git a/packages/services/voice-agent/src/plugins/speechmatics_stt.py b/packages/services/voice-agent/src/plugins/speechmatics_stt.py index fe500da..82a83fe 100644 --- a/packages/services/voice-agent/src/plugins/speechmatics_stt.py +++ b/packages/services/voice-agent/src/plugins/speechmatics_stt.py @@ -9,7 +9,7 @@ by the livekit-plugins-speechmatics package. """ import logging -from livekit.plugins.speechmatics import STT, TurnDetectionMode +from livekit.plugins.speechmatics import STT logger = logging.getLogger(__name__) @@ -39,14 +39,10 @@ def create_speechmatics_stt(language: str = "cmn") -> STT: stt = STT( language=sm_lang, include_partials=True, - # Use EXTERNAL turn detection so LiveKit's own VAD handles turn - # boundaries. ADAPTIVE enables a second client-side Silero VAD inside - # the Speechmatics SDK which conflicts with LiveKit's pipeline. - turn_detection_mode=TurnDetectionMode.EXTERNAL, ) # Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to # ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal # option after construction so the raw Speechmatics code is sent. stt._stt_options.language = sm_lang # type: ignore[assignment] - logger.info("Speechmatics STT created: language=%s (input=%s), turn_detection=EXTERNAL", sm_lang, language) + logger.info("Speechmatics STT created: language=%s (input=%s)", sm_lang, language) return stt