fix: extract text from assistant message + fix event data parsing

Root causes found: 1. SDK engine only emitted 'completed' without 'text' events because mapSdkMessage skipped text blocks in 'assistant' messages (assumed stream_event deltas would provide them, but SDK didn't send deltas) 2. Voice pipeline read evt_data.data.content but engine events are flat (evt_data.content) — so even if text arrived, it was never extracted Fixes: - Extract text/thinking blocks from assistant messages in SDK engine - Fix voice pipeline to read content directly from evt_data, not nested Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 03:01:25 -08:00 · 2026-02-24 03:01:25 -08:00 · aa2a49afd4
parent a7b42e6b98
commit aa2a49afd4
2 changed files with 13 additions and 9 deletions
--- a/packages/services/agent-service/src/infrastructure/engines/claude-agent-sdk/claude-agent-sdk-engine.ts
+++ b/packages/services/agent-service/src/infrastructure/engines/claude-agent-sdk/claude-agent-sdk-engine.ts
@ -377,13 +377,17 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
      }
      // Ignore content_block_start/stop/message_start/message_stop — they are structural
    } else if (message.type === 'assistant') {
-      // Complete assistant message (also emitted after streaming)
-      // When streaming is enabled, skip full blocks to avoid duplicate text
-      // Only forward tool_use and tool_result which aren't streamed as deltas
+      // Complete assistant message — extract ALL content blocks.
+      // Text/thinking blocks are emitted here as fallback since the SDK
+      // may not always send stream_event deltas (depends on SDK version/config).
      const content = message.message?.content;
      if (Array.isArray(content)) {
        for (const block of content) {
-          if (block.type === 'tool_use') {
+          if (block.type === 'text' && block.text) {
+            events.push({ type: 'text', content: block.text });
+          } else if (block.type === 'thinking' && block.thinking) {
+            events.push({ type: 'thinking', content: block.thinking });
+          } else if (block.type === 'tool_use') {
            events.push({
              type: 'tool_use',
              toolName: block.name ?? 'unknown',
@ -399,7 +403,6 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
              isError: block.is_error ?? false,
            });
          }
-          // text and thinking blocks are already streamed via stream_event deltas
        }
      }
    } else if (message.type === 'result') {
--- a/packages/services/voice-service/src/pipeline/base_pipeline.py
+++ b/packages/services/voice-service/src/pipeline/base_pipeline.py
@ -328,10 +328,11 @@ class VoicePipelineTask:
                    elif event_type == "stream_event":
                        evt_data = msg.get("data", {})
                        evt_type = evt_data.get("type", "")
-                        payload = evt_data.get("data", {})
+                        # Engine events are flat: { type, content, summary, ... }
+                        # (no nested "data" sub-field)

                        if evt_type == "text":
-                            content = payload.get("content", "")
+                            content = evt_data.get("content", "")
                            if content:
                                collected_text.append(content)
                                # Log first and periodic text events
@ -340,7 +341,7 @@ class VoicePipelineTask:
                                    print(f"[pipeline] [AGENT] Text event #{len(collected_text)}: +{len(content)} chars (total: {total_len})", flush=True)

                        elif evt_type == "completed":
-                            summary = payload.get("summary", "")
+                            summary = evt_data.get("summary", "")
                            if summary and not collected_text:
                                collected_text.append(summary)
                                print(f"[pipeline] [AGENT] Using summary as response: \"{summary[:100]}\"", flush=True)
@ -349,7 +350,7 @@ class VoicePipelineTask:
                            break

                        elif evt_type == "error":
-                            err_msg = payload.get("message", "Unknown error")
+                            err_msg = evt_data.get("message", "Unknown error")
                            print(f"[pipeline] [AGENT] ERROR event: {err_msg}", flush=True)
                            return f"Agent 错误: {err_msg}"