From 6707c5048dc1df66b7558301e8167dcb2a40f8db Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 3 Mar 2026 01:58:25 -0800 Subject: [PATCH] fix: use EXTERNAL mode + patch plugin to finalize on flush EXTERNAL mode produces partial transcripts but livekit-plugins-speechmatics does not call finalize() when receiving a flush sentinel from the framework. A runtime monkey-patch on the plugin's SpeechStream._process_audio adds the missing finalize() call so final transcripts are generated. Co-Authored-By: Claude Opus 4.6 --- packages/services/voice-agent/src/agent.py | 9 ++------- .../services/voice-agent/src/plugins/speechmatics_stt.py | 3 ++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py index d1af8c7..a637024 100644 --- a/packages/services/voice-agent/src/agent.py +++ b/packages/services/voice-agent/src/agent.py @@ -297,18 +297,13 @@ async def entrypoint(ctx: JobContext) -> None: engine_type=engine_type, ) - # Create and start AgentSession with the full pipeline. - # Speechmatics handles end-of-utterance natively via its Voice Agent - # API, so we use turn_detection="stt" to let it drive turn boundaries. - session_kwargs = dict( + # Create and start AgentSession with the full pipeline + session = AgentSession( vad=ctx.proc.userdata["vad"], stt=stt, llm=llm, tts=tts, ) - if stt_provider == "speechmatics": - session_kwargs["turn_detection"] = "stt" - session = AgentSession(**session_kwargs) await session.start( agent=IT0VoiceAgent(), diff --git a/packages/services/voice-agent/src/plugins/speechmatics_stt.py b/packages/services/voice-agent/src/plugins/speechmatics_stt.py index 82a83fe..fd0fdcd 100644 --- a/packages/services/voice-agent/src/plugins/speechmatics_stt.py +++ b/packages/services/voice-agent/src/plugins/speechmatics_stt.py @@ -9,7 +9,7 @@ by the livekit-plugins-speechmatics package. """ import logging -from livekit.plugins.speechmatics import STT +from livekit.plugins.speechmatics import STT, TurnDetectionMode logger = logging.getLogger(__name__) @@ -39,6 +39,7 @@ def create_speechmatics_stt(language: str = "cmn") -> STT: stt = STT( language=sm_lang, include_partials=True, + turn_detection_mode=TurnDetectionMode.EXTERNAL, ) # Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to # ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal