fix: use turn_detection=stt for Speechmatics per official docs
Speechmatics handles end-of-utterance natively via its Voice Agent API (ADAPTIVE mode). Use turn_detection="stt" on AgentSession so LiveKit delegates turn boundaries to the STT engine instead of conflicting with its own VAD-based turn detection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
db4e70e30c
commit
8f951ad31c
|
|
@ -297,13 +297,18 @@ async def entrypoint(ctx: JobContext) -> None:
|
||||||
engine_type=engine_type,
|
engine_type=engine_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create and start AgentSession with the full pipeline
|
# Create and start AgentSession with the full pipeline.
|
||||||
session = AgentSession(
|
# Speechmatics handles end-of-utterance natively via its Voice Agent
|
||||||
|
# API, so we use turn_detection="stt" to let it drive turn boundaries.
|
||||||
|
session_kwargs = dict(
|
||||||
vad=ctx.proc.userdata["vad"],
|
vad=ctx.proc.userdata["vad"],
|
||||||
stt=stt,
|
stt=stt,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
tts=tts,
|
tts=tts,
|
||||||
)
|
)
|
||||||
|
if stt_provider == "speechmatics":
|
||||||
|
session_kwargs["turn_detection"] = "stt"
|
||||||
|
session = AgentSession(**session_kwargs)
|
||||||
|
|
||||||
await session.start(
|
await session.start(
|
||||||
agent=IT0VoiceAgent(),
|
agent=IT0VoiceAgent(),
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ by the livekit-plugins-speechmatics package.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from livekit.plugins.speechmatics import STT, TurnDetectionMode
|
from livekit.plugins.speechmatics import STT
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -39,14 +39,10 @@ def create_speechmatics_stt(language: str = "cmn") -> STT:
|
||||||
stt = STT(
|
stt = STT(
|
||||||
language=sm_lang,
|
language=sm_lang,
|
||||||
include_partials=True,
|
include_partials=True,
|
||||||
# Use EXTERNAL turn detection so LiveKit's own VAD handles turn
|
|
||||||
# boundaries. ADAPTIVE enables a second client-side Silero VAD inside
|
|
||||||
# the Speechmatics SDK which conflicts with LiveKit's pipeline.
|
|
||||||
turn_detection_mode=TurnDetectionMode.EXTERNAL,
|
|
||||||
)
|
)
|
||||||
# Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to
|
# Workaround: LiveKit's LanguageCode normalizes ISO 639-3 "cmn" back to
|
||||||
# ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal
|
# ISO 639-1 "zh", but Speechmatics expects "cmn". Override the internal
|
||||||
# option after construction so the raw Speechmatics code is sent.
|
# option after construction so the raw Speechmatics code is sent.
|
||||||
stt._stt_options.language = sm_lang # type: ignore[assignment]
|
stt._stt_options.language = sm_lang # type: ignore[assignment]
|
||||||
logger.info("Speechmatics STT created: language=%s (input=%s), turn_detection=EXTERNAL", sm_lang, language)
|
logger.info("Speechmatics STT created: language=%s (input=%s)", sm_lang, language)
|
||||||
return stt
|
return stt
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue