feat: add detailed logging to voice pipeline (STT/Agent/TTS timing)
Log timestamps, content, and event details at each pipeline stage to help diagnose voice-agent integration issues. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1d5c834dfe
commit
0dbe711ed3
|
|
@ -173,19 +173,20 @@ class VoicePipelineTask:
|
||||||
"""Transcribe speech, generate LLM response, synthesize and send TTS."""
|
"""Transcribe speech, generate LLM response, synthesize and send TTS."""
|
||||||
session_id = self.session_context.get("session_id", "?")
|
session_id = self.session_context.get("session_id", "?")
|
||||||
audio_secs = len(audio_data) / (_SAMPLE_RATE * _BYTES_PER_SAMPLE)
|
audio_secs = len(audio_data) / (_SAMPLE_RATE * _BYTES_PER_SAMPLE)
|
||||||
print(f"[pipeline] Processing speech: {audio_secs:.1f}s of audio", flush=True)
|
print(f"[pipeline] ===== Processing speech: {audio_secs:.1f}s of audio, session={session_id} =====", flush=True)
|
||||||
|
|
||||||
# 1. STT
|
# 1. STT
|
||||||
|
t0 = time.time()
|
||||||
text = await self._transcribe(audio_data)
|
text = await self._transcribe(audio_data)
|
||||||
|
stt_ms = int((time.time() - t0) * 1000)
|
||||||
if not text or not text.strip():
|
if not text or not text.strip():
|
||||||
print(f"[pipeline] STT returned empty text, skipping", flush=True)
|
print(f"[pipeline] STT returned empty text (took {stt_ms}ms), skipping", flush=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"[pipeline] User said: {text.strip()}", flush=True)
|
print(f"[pipeline] [STT] ({stt_ms}ms) User said: \"{text.strip()}\"", flush=True)
|
||||||
|
|
||||||
# Notify client that we heard them
|
# Notify client that we heard them
|
||||||
try:
|
try:
|
||||||
import json
|
|
||||||
await self.websocket.send_text(
|
await self.websocket.send_text(
|
||||||
json.dumps({"type": "transcript", "text": text.strip(), "role": "user"})
|
json.dumps({"type": "transcript", "text": text.strip(), "role": "user"})
|
||||||
)
|
)
|
||||||
|
|
@ -193,12 +194,16 @@ class VoicePipelineTask:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# 2. Agent service — create task + subscribe for response
|
# 2. Agent service — create task + subscribe for response
|
||||||
|
print(f"[pipeline] [AGENT] Sending to agent: \"{text.strip()}\"", flush=True)
|
||||||
|
t1 = time.time()
|
||||||
response_text = await self._agent_generate(text.strip())
|
response_text = await self._agent_generate(text.strip())
|
||||||
|
agent_ms = int((time.time() - t1) * 1000)
|
||||||
|
|
||||||
if not response_text:
|
if not response_text:
|
||||||
print(f"[pipeline] Agent returned empty response", flush=True)
|
print(f"[pipeline] [AGENT] ({agent_ms}ms) Agent returned empty response!", flush=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"[pipeline] Agent says: {response_text[:100]}", flush=True)
|
print(f"[pipeline] [AGENT] ({agent_ms}ms) Agent response ({len(response_text)} chars): \"{response_text[:200]}\"", flush=True)
|
||||||
|
|
||||||
# Notify client of the response text
|
# Notify client of the response text
|
||||||
try:
|
try:
|
||||||
|
|
@ -209,7 +214,12 @@ class VoicePipelineTask:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# 3. TTS → send audio back
|
# 3. TTS → send audio back
|
||||||
|
print(f"[pipeline] [TTS] Synthesizing {len(response_text)} chars...", flush=True)
|
||||||
|
t2 = time.time()
|
||||||
await self._synthesize_and_send(response_text)
|
await self._synthesize_and_send(response_text)
|
||||||
|
tts_ms = int((time.time() - t2) * 1000)
|
||||||
|
print(f"[pipeline] [TTS] ({tts_ms}ms) Audio sent to client", flush=True)
|
||||||
|
print(f"[pipeline] ===== Turn complete: STT={stt_ms}ms + Agent={agent_ms}ms + TTS={tts_ms}ms = {stt_ms+agent_ms+tts_ms}ms =====", flush=True)
|
||||||
|
|
||||||
async def _transcribe(self, audio_data: bytes) -> str:
|
async def _transcribe(self, audio_data: bytes) -> str:
|
||||||
"""Transcribe audio using STT service."""
|
"""Transcribe audio using STT service."""
|
||||||
|
|
@ -240,11 +250,14 @@ class VoicePipelineTask:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
collected_text = []
|
collected_text = []
|
||||||
|
event_count = 0
|
||||||
timeout_secs = 120 # Max wait for agent response
|
timeout_secs = 120 # Max wait for agent response
|
||||||
|
|
||||||
|
print(f"[pipeline] [AGENT] Connecting WS: {ws_url}", flush=True)
|
||||||
async with websockets.connect(ws_url) as ws:
|
async with websockets.connect(ws_url) as ws:
|
||||||
|
print(f"[pipeline] [AGENT] WS connected", flush=True)
|
||||||
|
|
||||||
# 1. Subscribe FIRST (before creating task to avoid missing events)
|
# 1. Subscribe FIRST (before creating task to avoid missing events)
|
||||||
# Use existing session ID for subscription; if none, we'll re-subscribe after task creation
|
|
||||||
pre_session_id = self._agent_session_id or ""
|
pre_session_id = self._agent_session_id or ""
|
||||||
|
|
||||||
if pre_session_id:
|
if pre_session_id:
|
||||||
|
|
@ -253,29 +266,30 @@ class VoicePipelineTask:
|
||||||
"data": {"sessionId": pre_session_id},
|
"data": {"sessionId": pre_session_id},
|
||||||
})
|
})
|
||||||
await ws.send(subscribe_msg)
|
await ws.send(subscribe_msg)
|
||||||
print(f"[pipeline] Pre-subscribed to agent WS session={pre_session_id}", flush=True)
|
print(f"[pipeline] [AGENT] Pre-subscribed session={pre_session_id}", flush=True)
|
||||||
|
|
||||||
# 2. Create agent task
|
# 2. Create agent task
|
||||||
body = {"prompt": user_text}
|
body = {"prompt": user_text}
|
||||||
if self._agent_session_id:
|
if self._agent_session_id:
|
||||||
body["sessionId"] = self._agent_session_id
|
body["sessionId"] = self._agent_session_id
|
||||||
|
|
||||||
print(f"[pipeline] Creating agent task: {user_text[:60]}", flush=True)
|
print(f"[pipeline] [AGENT] POST /tasks prompt=\"{user_text[:80]}\"", flush=True)
|
||||||
async with httpx.AsyncClient(timeout=30) as client:
|
async with httpx.AsyncClient(timeout=30) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{agent_url}/api/v1/agent/tasks",
|
f"{agent_url}/api/v1/agent/tasks",
|
||||||
json=body,
|
json=body,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
)
|
)
|
||||||
|
print(f"[pipeline] [AGENT] POST response: {resp.status_code}", flush=True)
|
||||||
if resp.status_code != 200 and resp.status_code != 201:
|
if resp.status_code != 200 and resp.status_code != 201:
|
||||||
print(f"[pipeline] Agent task creation failed: {resp.status_code} {resp.text}", flush=True)
|
print(f"[pipeline] [AGENT] Task creation FAILED: {resp.status_code} {resp.text[:200]}", flush=True)
|
||||||
return "抱歉,Agent服务暂时不可用。"
|
return "抱歉,Agent服务暂时不可用。"
|
||||||
|
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
session_id = data.get("sessionId", "")
|
session_id = data.get("sessionId", "")
|
||||||
task_id = data.get("taskId", "")
|
task_id = data.get("taskId", "")
|
||||||
self._agent_session_id = session_id
|
self._agent_session_id = session_id
|
||||||
print(f"[pipeline] Agent task created: session={session_id}, task={task_id}", flush=True)
|
print(f"[pipeline] [AGENT] Task created: session={session_id}, task={task_id}", flush=True)
|
||||||
|
|
||||||
# 3. Subscribe with actual session/task IDs (covers first-time case)
|
# 3. Subscribe with actual session/task IDs (covers first-time case)
|
||||||
subscribe_msg = json.dumps({
|
subscribe_msg = json.dumps({
|
||||||
|
|
@ -283,26 +297,35 @@ class VoicePipelineTask:
|
||||||
"data": {"sessionId": session_id, "taskId": task_id},
|
"data": {"sessionId": session_id, "taskId": task_id},
|
||||||
})
|
})
|
||||||
await ws.send(subscribe_msg)
|
await ws.send(subscribe_msg)
|
||||||
print(f"[pipeline] Subscribed to agent WS session={session_id}, task={task_id}", flush=True)
|
print(f"[pipeline] [AGENT] Subscribed session={session_id}, task={task_id}", flush=True)
|
||||||
|
|
||||||
# 4. Collect events until completed
|
# 4. Collect events until completed
|
||||||
deadline = time.time() + timeout_secs
|
deadline = time.time() + timeout_secs
|
||||||
while time.time() < deadline:
|
while time.time() < deadline:
|
||||||
|
remaining = deadline - time.time()
|
||||||
try:
|
try:
|
||||||
raw = await asyncio.wait_for(ws.recv(), timeout=5.0)
|
raw = await asyncio.wait_for(ws.recv(), timeout=min(5.0, remaining))
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
|
if time.time() >= deadline:
|
||||||
|
print(f"[pipeline] [AGENT] TIMEOUT after {timeout_secs}s waiting for events", flush=True)
|
||||||
continue
|
continue
|
||||||
except Exception:
|
except Exception as ws_err:
|
||||||
|
print(f"[pipeline] [AGENT] WS recv error: {type(ws_err).__name__}: {ws_err}", flush=True)
|
||||||
break
|
break
|
||||||
|
|
||||||
try:
|
try:
|
||||||
msg = json.loads(raw)
|
msg = json.loads(raw)
|
||||||
except (json.JSONDecodeError, TypeError):
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
print(f"[pipeline] [AGENT] Non-JSON WS message: {str(raw)[:100]}", flush=True)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
event_type = msg.get("event", "")
|
event_type = msg.get("event", "")
|
||||||
|
event_count += 1
|
||||||
|
|
||||||
if event_type == "stream_event":
|
if event_type == "subscribed":
|
||||||
|
print(f"[pipeline] [AGENT] Subscription confirmed: {msg.get('data', {})}", flush=True)
|
||||||
|
|
||||||
|
elif event_type == "stream_event":
|
||||||
evt_data = msg.get("data", {})
|
evt_data = msg.get("data", {})
|
||||||
evt_type = evt_data.get("type", "")
|
evt_type = evt_data.get("type", "")
|
||||||
payload = evt_data.get("data", {})
|
payload = evt_data.get("data", {})
|
||||||
|
|
@ -311,21 +334,34 @@ class VoicePipelineTask:
|
||||||
content = payload.get("content", "")
|
content = payload.get("content", "")
|
||||||
if content:
|
if content:
|
||||||
collected_text.append(content)
|
collected_text.append(content)
|
||||||
|
# Log first and periodic text events
|
||||||
|
if len(collected_text) <= 3 or len(collected_text) % 10 == 0:
|
||||||
|
total_len = sum(len(t) for t in collected_text)
|
||||||
|
print(f"[pipeline] [AGENT] Text event #{len(collected_text)}: +{len(content)} chars (total: {total_len})", flush=True)
|
||||||
|
|
||||||
elif evt_type == "completed":
|
elif evt_type == "completed":
|
||||||
summary = payload.get("summary", "")
|
summary = payload.get("summary", "")
|
||||||
if summary and not collected_text:
|
if summary and not collected_text:
|
||||||
collected_text.append(summary)
|
collected_text.append(summary)
|
||||||
print(f"[pipeline] Agent completed", flush=True)
|
print(f"[pipeline] [AGENT] Using summary as response: \"{summary[:100]}\"", flush=True)
|
||||||
|
total_chars = sum(len(t) for t in collected_text)
|
||||||
|
print(f"[pipeline] [AGENT] Completed! {len(collected_text)} text events, {total_chars} chars total, {event_count} WS events received", flush=True)
|
||||||
break
|
break
|
||||||
|
|
||||||
elif evt_type == "error":
|
elif evt_type == "error":
|
||||||
err_msg = payload.get("message", "Unknown error")
|
err_msg = payload.get("message", "Unknown error")
|
||||||
print(f"[pipeline] Agent error: {err_msg}", flush=True)
|
print(f"[pipeline] [AGENT] ERROR event: {err_msg}", flush=True)
|
||||||
return f"Agent 错误: {err_msg}"
|
return f"Agent 错误: {err_msg}"
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"[pipeline] [AGENT] Stream event type={evt_type}", flush=True)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"[pipeline] [AGENT] WS event: {event_type}", flush=True)
|
||||||
|
|
||||||
result = "".join(collected_text).strip()
|
result = "".join(collected_text).strip()
|
||||||
if not result:
|
if not result:
|
||||||
|
print(f"[pipeline] [AGENT] WARNING: No text collected after {event_count} events!", flush=True)
|
||||||
return "Agent 未返回回复。"
|
return "Agent 未返回回复。"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue