fix: use EXTERNAL mode + patch plugin to finalize on flush

EXTERNAL mode produces partial transcripts but livekit-plugins-speechmatics
does not call finalize() when receiving a flush sentinel from the framework.
A runtime monkey-patch on the plugin's SpeechStream._process_audio adds the
missing finalize() call so final transcripts are generated.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-03-03 01:58:25 -08:00
parent 8f951ad31c
commit 6707c5048d
2 changed files with 4 additions and 8 deletions

View File

@ -297,18 +297,13 @@ async def entrypoint(ctx: JobContext) -> None:
engine_type=engine_type,
)
# Create and start AgentSession with the full pipeline.
# Speechmatics handles end-of-utterance natively via its Voice Agent
# API, so we use turn_detection="stt" to let it drive turn boundaries.
session_kwargs = dict(
# Create and start AgentSession with the full pipeline
session = AgentSession(
vad=ctx.proc.userdata["vad"],
stt=stt,
llm=llm,
tts=tts,
)
if stt_provider == "speechmatics":
session_kwargs["turn_detection"] = "stt"
session = AgentSession(**session_kwargs)
await session.start(
agent=IT0VoiceAgent(),

View File

@ -9,7 +9,7 @@ by the livekit-plugins-speechmatics package.
"""
import logging
from livekit.plugins.speechmatics import STT
from livekit.plugins.speechmatics import STT, TurnDetectionMode
logger = logging.getLogger(__name__)
@ -39,6 +39,7 @@ def create_speechmatics_stt(language: str = "cmn") -> STT:
stt = STT(
language=sm_lang,
include_partials=True,
turn_detection_mode=TurnDetectionMode.EXTERNAL,
)
# Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to
# ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal