fix: extract text from assistant message + fix event data parsing

Root causes found:
1. SDK engine only emitted 'completed' without 'text' events because
   mapSdkMessage skipped text blocks in 'assistant' messages (assumed
   stream_event deltas would provide them, but SDK didn't send deltas)
2. Voice pipeline read evt_data.data.content but engine events are flat
   (evt_data.content) — so even if text arrived, it was never extracted

Fixes:
- Extract text/thinking blocks from assistant messages in SDK engine
- Fix voice pipeline to read content directly from evt_data, not nested

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-24 03:01:25 -08:00
parent a7b42e6b98
commit aa2a49afd4
2 changed files with 13 additions and 9 deletions

View File

@ -377,13 +377,17 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
}
// Ignore content_block_start/stop/message_start/message_stop — they are structural
} else if (message.type === 'assistant') {
// Complete assistant message (also emitted after streaming)
// When streaming is enabled, skip full blocks to avoid duplicate text
// Only forward tool_use and tool_result which aren't streamed as deltas
// Complete assistant message — extract ALL content blocks.
// Text/thinking blocks are emitted here as fallback since the SDK
// may not always send stream_event deltas (depends on SDK version/config).
const content = message.message?.content;
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'tool_use') {
if (block.type === 'text' && block.text) {
events.push({ type: 'text', content: block.text });
} else if (block.type === 'thinking' && block.thinking) {
events.push({ type: 'thinking', content: block.thinking });
} else if (block.type === 'tool_use') {
events.push({
type: 'tool_use',
toolName: block.name ?? 'unknown',
@ -399,7 +403,6 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
isError: block.is_error ?? false,
});
}
// text and thinking blocks are already streamed via stream_event deltas
}
}
} else if (message.type === 'result') {

View File

@ -328,10 +328,11 @@ class VoicePipelineTask:
elif event_type == "stream_event":
evt_data = msg.get("data", {})
evt_type = evt_data.get("type", "")
payload = evt_data.get("data", {})
# Engine events are flat: { type, content, summary, ... }
# (no nested "data" sub-field)
if evt_type == "text":
content = payload.get("content", "")
content = evt_data.get("content", "")
if content:
collected_text.append(content)
# Log first and periodic text events
@ -340,7 +341,7 @@ class VoicePipelineTask:
print(f"[pipeline] [AGENT] Text event #{len(collected_text)}: +{len(content)} chars (total: {total_len})", flush=True)
elif evt_type == "completed":
summary = payload.get("summary", "")
summary = evt_data.get("summary", "")
if summary and not collected_text:
collected_text.append(summary)
print(f"[pipeline] [AGENT] Using summary as response: \"{summary[:100]}\"", flush=True)
@ -349,7 +350,7 @@ class VoicePipelineTask:
break
elif evt_type == "error":
err_msg = payload.get("message", "Unknown error")
err_msg = evt_data.get("message", "Unknown error")
print(f"[pipeline] [AGENT] ERROR event: {err_msg}", flush=True)
return f"Agent 错误: {err_msg}"