fix: correct Speechmatics STT language mapping and parameter name

- Map Whisper language codes (zh→cmn, en→en, etc.) to Speechmatics codes
- Fix parameter name: enable_partials → include_partials

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-03-02 23:56:37 -08:00
parent e32a3a9800
commit 1d43943110
1 changed files with 17 additions and 5 deletions

View File

@ -14,19 +14,31 @@ from livekit.plugins import speechmatics
logger = logging.getLogger(__name__)
# Map Whisper language codes to Speechmatics language codes
_LANG_MAP = {
"zh": "cmn",
"en": "en",
"ja": "ja",
"ko": "ko",
"de": "de",
"fr": "fr",
}
def create_speechmatics_stt(language: str = "cmn") -> speechmatics.STT:
"""Create a Speechmatics STT instance for the voice pipeline.
Args:
language: Speechmatics language code. Default 'cmn' for Mandarin Chinese.
Use 'cmn_en' for Mandarin-English bilingual, 'en' for English.
language: Language code (Whisper or Speechmatics). Whisper codes like
'zh' are automatically mapped to Speechmatics equivalents.
Returns:
Configured speechmatics.STT instance.
"""
sm_lang = _LANG_MAP.get(language, language)
stt = speechmatics.STT(
language=language,
enable_partials=True,
language=sm_lang,
include_partials=True,
)
logger.info("Speechmatics STT created: language=%s", language)
logger.info("Speechmatics STT created: language=%s (input=%s)", sm_lang, language)
return stt