From 740f8f5f88370ad1fd04888c2ad417a21cbcac49 Mon Sep 17 00:00:00 2001
From: hailin <hailin.zhao@gdzx.xyz>
Date: Tue, 24 Feb 2026 05:03:05 -0800
Subject: [PATCH] fix: sentence splitting bug in voice pipeline TTS streaming

When the first punctuation mark appeared before _MIN_SENTENCE_LEN chars,
the regex search would always find it first and skip it, permanently
blocking all subsequent sentence splits. Fix by advancing search_start
past short matches instead of breaking out of the loop.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../src/pipeline/base_pipeline.py             | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/packages/services/voice-service/src/pipeline/base_pipeline.py b/packages/services/voice-service/src/pipeline/base_pipeline.py
index 725406a..66f3bdc 100644
--- a/packages/services/voice-service/src/pipeline/base_pipeline.py
+++ b/packages/services/voice-service/src/pipeline/base_pipeline.py
@@ -219,21 +219,26 @@ class VoicePipelineTask:
                 sentence_buf += chunk
 
                 # Check for sentence boundaries
+                search_start = 0
                 while True:
-                    match = _SENTENCE_END_RE.search(sentence_buf)
-                    if match and match.end() >= _MIN_SENTENCE_LEN:
-                        sentence = sentence_buf[:match.end()].strip()
-                        sentence_buf = sentence_buf[match.end():]
-                        if sentence:
-                            tts_count += 1
-                            if first_audio_ms is None:
-                                first_audio_ms = int((time.time() - t1) * 1000)
-                                print(f"[pipeline] [TTS] First sentence ready at {first_audio_ms}ms: \"{sentence[:60]}\"", flush=True)
-                            else:
-                                print(f"[pipeline] [TTS] Sentence #{tts_count}: \"{sentence[:60]}\"", flush=True)
-                            await self._synthesize_chunk(sentence)
-                    else:
+                    match = _SENTENCE_END_RE.search(sentence_buf, search_start)
+                    if not match:
                         break
+                    # Skip matches that are too early — sentence too short
+                    if match.end() < _MIN_SENTENCE_LEN:
+                        search_start = match.end()
+                        continue
+                    sentence = sentence_buf[:match.end()].strip()
+                    sentence_buf = sentence_buf[match.end():]
+                    search_start = 0
+                    if sentence:
+                        tts_count += 1
+                        if first_audio_ms is None:
+                            first_audio_ms = int((time.time() - t1) * 1000)
+                            print(f"[pipeline] [TTS] First sentence ready at {first_audio_ms}ms: \"{sentence[:60]}\"", flush=True)
+                        else:
+                            print(f"[pipeline] [TTS] Sentence #{tts_count}: \"{sentence[:60]}\"", flush=True)
+                        await self._synthesize_chunk(sentence)
 
             # Flush remaining buffer
             remaining = sentence_buf.strip()
@@ -312,8 +317,8 @@ class VoicePipelineTask:
                     }))
                     print(f"[pipeline] [AGENT] Pre-subscribed session={pre_session_id}", flush=True)
 
-                # 2. Create agent task
-                body = {"prompt": user_text}
+                # 2. Create agent task (use claude_api engine for streaming TTS)
+                body = {"prompt": user_text, "engineType": "claude_api"}
                 if self._agent_session_id:
                     body["sessionId"] = self._agent_session_id