From 740f8f5f88370ad1fd04888c2ad417a21cbcac49 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 24 Feb 2026 05:03:05 -0800 Subject: [PATCH] fix: sentence splitting bug in voice pipeline TTS streaming When the first punctuation mark appeared before _MIN_SENTENCE_LEN chars, the regex search would always find it first and skip it, permanently blocking all subsequent sentence splits. Fix by advancing search_start past short matches instead of breaking out of the loop. Co-Authored-By: Claude Opus 4.6 --- .../src/pipeline/base_pipeline.py | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/packages/services/voice-service/src/pipeline/base_pipeline.py b/packages/services/voice-service/src/pipeline/base_pipeline.py index 725406a..66f3bdc 100644 --- a/packages/services/voice-service/src/pipeline/base_pipeline.py +++ b/packages/services/voice-service/src/pipeline/base_pipeline.py @@ -219,21 +219,26 @@ class VoicePipelineTask: sentence_buf += chunk # Check for sentence boundaries + search_start = 0 while True: - match = _SENTENCE_END_RE.search(sentence_buf) - if match and match.end() >= _MIN_SENTENCE_LEN: - sentence = sentence_buf[:match.end()].strip() - sentence_buf = sentence_buf[match.end():] - if sentence: - tts_count += 1 - if first_audio_ms is None: - first_audio_ms = int((time.time() - t1) * 1000) - print(f"[pipeline] [TTS] First sentence ready at {first_audio_ms}ms: \"{sentence[:60]}\"", flush=True) - else: - print(f"[pipeline] [TTS] Sentence #{tts_count}: \"{sentence[:60]}\"", flush=True) - await self._synthesize_chunk(sentence) - else: + match = _SENTENCE_END_RE.search(sentence_buf, search_start) + if not match: break + # Skip matches that are too early — sentence too short + if match.end() < _MIN_SENTENCE_LEN: + search_start = match.end() + continue + sentence = sentence_buf[:match.end()].strip() + sentence_buf = sentence_buf[match.end():] + search_start = 0 + if sentence: + tts_count += 1 + if first_audio_ms is None: + first_audio_ms = int((time.time() - t1) * 1000) + print(f"[pipeline] [TTS] First sentence ready at {first_audio_ms}ms: \"{sentence[:60]}\"", flush=True) + else: + print(f"[pipeline] [TTS] Sentence #{tts_count}: \"{sentence[:60]}\"", flush=True) + await self._synthesize_chunk(sentence) # Flush remaining buffer remaining = sentence_buf.strip() @@ -312,8 +317,8 @@ class VoicePipelineTask: })) print(f"[pipeline] [AGENT] Pre-subscribed session={pre_session_id}", flush=True) - # 2. Create agent task - body = {"prompt": user_text} + # 2. Create agent task (use claude_api engine for streaming TTS) + body = {"prompt": user_text, "engineType": "claude_api"} if self._agent_session_id: body["sessionId"] = self._agent_session_id