debug: add verbose logging to Speechmatics monkey-patch

Trace _patched_process_audio lifecycle and FlushSentinel handling
to diagnose why final transcripts are not being promoted.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-03-03 02:50:04 -08:00
parent 1431dc0c83
commit de3eccafd0
1 changed files with 11 additions and 3 deletions

View File

@ -39,6 +39,8 @@ def _patched_handle_partial_segment(self: SpeechStream, message: dict) -> None:
"""Intercept partial segments and stash them for flush-time promotion."""
segments = message.get("segments", [])
if segments:
text = " | ".join(s.get("text", "") for s in segments)
logger.info("[SM-PATCH] stashing %d partial segment(s): %s", len(segments), text[:100])
self._sm_last_partial_segments = segments # type: ignore[attr-defined]
_original_handle_partial_segment(self, message)
@ -47,35 +49,41 @@ _original_process_audio = SpeechStream._process_audio
async def _patched_process_audio(self: SpeechStream) -> None: # type: ignore[override]
logger.info("[SM-PATCH] _patched_process_audio STARTED")
self._sm_last_partial_segments: list = [] # type: ignore[attr-defined]
try:
audio_bstream = utils.audio.AudioByteStream(
sample_rate=self._stt._sample_rate,
num_channels=1,
)
frame_count = 0
async for data in self._input_ch:
if isinstance(data, self._FlushSentinel):
logger.info("[SM-PATCH] FlushSentinel received (after %d frames)", frame_count)
frames = audio_bstream.flush()
# Promote stored partials → FINAL_TRANSCRIPT immediately
stored = getattr(self, "_sm_last_partial_segments", [])
if stored:
logger.info(
"FlushSentinel: promoting %d partial segment(s) to FINAL",
"[SM-PATCH] promoting %d partial segment(s) to FINAL",
len(stored),
)
self._send_frames(stored, is_final=True)
self._sm_last_partial_segments = [] # type: ignore[attr-defined]
else:
logger.warning("FlushSentinel received but no partial segments stored")
logger.warning("[SM-PATCH] FlushSentinel but no partial segments stored")
else:
frames = audio_bstream.write(data.data.tobytes())
frame_count += 1
if self._client:
for frame in frames:
self._speech_duration += frame.duration
await self._client.send_audio(frame.data.tobytes())
except asyncio.CancelledError:
pass
logger.info("[SM-PATCH] _patched_process_audio cancelled (processed %d frames)", frame_count if 'frame_count' in dir() else -1)
except Exception as e:
logger.error("[SM-PATCH] _patched_process_audio ERROR: %s", e, exc_info=True)
SpeechStream._process_audio = _patched_process_audio # type: ignore[assignment]