diff --git a/packages/services/voice-agent/src/plugins/speechmatics_stt.py b/packages/services/voice-agent/src/plugins/speechmatics_stt.py index b9a5aef..80785c2 100644 --- a/packages/services/voice-agent/src/plugins/speechmatics_stt.py +++ b/packages/services/voice-agent/src/plugins/speechmatics_stt.py @@ -39,6 +39,8 @@ def _patched_handle_partial_segment(self: SpeechStream, message: dict) -> None: """Intercept partial segments and stash them for flush-time promotion.""" segments = message.get("segments", []) if segments: + text = " | ".join(s.get("text", "") for s in segments) + logger.info("[SM-PATCH] stashing %d partial segment(s): %s", len(segments), text[:100]) self._sm_last_partial_segments = segments # type: ignore[attr-defined] _original_handle_partial_segment(self, message) @@ -47,35 +49,41 @@ _original_process_audio = SpeechStream._process_audio async def _patched_process_audio(self: SpeechStream) -> None: # type: ignore[override] + logger.info("[SM-PATCH] _patched_process_audio STARTED") self._sm_last_partial_segments: list = [] # type: ignore[attr-defined] try: audio_bstream = utils.audio.AudioByteStream( sample_rate=self._stt._sample_rate, num_channels=1, ) + frame_count = 0 async for data in self._input_ch: if isinstance(data, self._FlushSentinel): + logger.info("[SM-PATCH] FlushSentinel received (after %d frames)", frame_count) frames = audio_bstream.flush() # Promote stored partials → FINAL_TRANSCRIPT immediately stored = getattr(self, "_sm_last_partial_segments", []) if stored: logger.info( - "FlushSentinel: promoting %d partial segment(s) to FINAL", + "[SM-PATCH] promoting %d partial segment(s) to FINAL", len(stored), ) self._send_frames(stored, is_final=True) self._sm_last_partial_segments = [] # type: ignore[attr-defined] else: - logger.warning("FlushSentinel received but no partial segments stored") + logger.warning("[SM-PATCH] FlushSentinel but no partial segments stored") else: frames = audio_bstream.write(data.data.tobytes()) + frame_count += 1 if self._client: for frame in frames: self._speech_duration += frame.duration await self._client.send_audio(frame.data.tobytes()) except asyncio.CancelledError: - pass + logger.info("[SM-PATCH] _patched_process_audio cancelled (processed %d frames)", frame_count if 'frame_count' in dir() else -1) + except Exception as e: + logger.error("[SM-PATCH] _patched_process_audio ERROR: %s", e, exc_info=True) SpeechStream._process_audio = _patched_process_audio # type: ignore[assignment]