From 1d439431101ae372910104f6b2902fe169fc5ebe Mon Sep 17 00:00:00 2001 From: hailin Date: Mon, 2 Mar 2026 23:56:37 -0800 Subject: [PATCH] fix: correct Speechmatics STT language mapping and parameter name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Map Whisper language codes (zh→cmn, en→en, etc.) to Speechmatics codes - Fix parameter name: enable_partials → include_partials Co-Authored-By: Claude Opus 4.6 --- .../src/plugins/speechmatics_stt.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/packages/services/voice-agent/src/plugins/speechmatics_stt.py b/packages/services/voice-agent/src/plugins/speechmatics_stt.py index b81b886..edcf9cb 100644 --- a/packages/services/voice-agent/src/plugins/speechmatics_stt.py +++ b/packages/services/voice-agent/src/plugins/speechmatics_stt.py @@ -14,19 +14,31 @@ from livekit.plugins import speechmatics logger = logging.getLogger(__name__) +# Map Whisper language codes to Speechmatics language codes +_LANG_MAP = { + "zh": "cmn", + "en": "en", + "ja": "ja", + "ko": "ko", + "de": "de", + "fr": "fr", +} + + def create_speechmatics_stt(language: str = "cmn") -> speechmatics.STT: """Create a Speechmatics STT instance for the voice pipeline. Args: - language: Speechmatics language code. Default 'cmn' for Mandarin Chinese. - Use 'cmn_en' for Mandarin-English bilingual, 'en' for English. + language: Language code (Whisper or Speechmatics). Whisper codes like + 'zh' are automatically mapped to Speechmatics equivalents. Returns: Configured speechmatics.STT instance. """ + sm_lang = _LANG_MAP.get(language, language) stt = speechmatics.STT( - language=language, - enable_partials=True, + language=sm_lang, + include_partials=True, ) - logger.info("Speechmatics STT created: language=%s", language) + logger.info("Speechmatics STT created: language=%s (input=%s)", sm_lang, language) return stt