fix(agents): preserve image content blocks in context injection — fixes 209K token overflow

injectIntoMessages() was JSON.stringify-ing array content (with image blocks), turning base64 data into text tokens (~170K) instead of image tokens (~1,600). Fix: append context as a new text block in the array, preserving image block format. Also fixes token estimation to count images at ~1,600 tokens instead of base64 char length, and adds debug logging for API call token composition. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 08:51:24 -08:00 · 2026-02-07 08:51:24 -08:00 · fa835e4f56
parent 7dc364d9b3
commit fa835e4f56
3 changed files with 83 additions and 13 deletions
--- a/packages/services/conversation-service/src/infrastructure/agents/coordinator/agent-loop.ts
+++ b/packages/services/conversation-service/src/infrastructure/agents/coordinator/agent-loop.ts
@ -144,9 +144,32 @@ export async function* agentLoop(
  }
  // ---- Call Claude API (Streaming, with retry for rate limits) ----
-  logger.debug(
+  // Debug: log message composition to diagnose token issues
-    `[Turn ${currentTurn + 1}/${maxTurns}] Calling Claude API with ${messages.length} messages`,
+  {
-  );
+    let textChars = 0;
    let imageCount = 0;
    let docCount = 0;
    for (const msg of messages) {
      if (typeof msg.content === 'string') {
        textChars += msg.content.length;
      } else if (Array.isArray(msg.content)) {
        for (const block of msg.content as any[]) {
          if (block.type === 'text') textChars += (block.text || '').length;
          else if (block.type === 'image') imageCount++;
          else if (block.type === 'document') docCount++;
          else if (block.type === 'tool_result') textChars += String(block.content || '').length;
        }
      }
    }
    const systemChars = Array.isArray(systemPrompt)
      ? systemPrompt.reduce((sum: number, b: any) => sum + (b.text?.length || 0), 0)
      : 0;
    logger.log(
      `[Turn ${currentTurn + 1}/${maxTurns}] API call: ${messages.length} msgs, ` +
      `~${Math.round(textChars / 3)} text tokens, ${imageCount} images (~${imageCount * 1600} tokens), ` +
      `${docCount} docs, system prompt ~${Math.round(systemChars / 3)} tokens`,
    );
  }
  let stream!: ReturnType<typeof anthropicClient.messages.stream>;
  const MAX_RETRIES = 2;
--- a/packages/services/conversation-service/src/infrastructure/agents/coordinator/context-injector.service.ts
+++ b/packages/services/conversation-service/src/infrastructure/agents/coordinator/context-injector.service.ts
@ -428,14 +428,26 @@ export class ContextInjectorService {
    if (lastUserIndex >= 0) {
      const lastUser = result[lastUserIndex];
-      const originalContent = typeof lastUser.content === 'string'
+      const contextSuffix = `\n\n<system-context>\n${injectionText}\n</system-context>`;
        ? lastUser.content
        : JSON.stringify(lastUser.content);
-      result[lastUserIndex] = {
+      if (typeof lastUser.content === 'string') {
-        ...lastUser,
+        // Simple string content — append as text
-        content: `${originalContent}\n\n<system-context>\n${injectionText}\n</system-context>`,
+        result[lastUserIndex] = {
-      };
+          ...lastUser,
          content: lastUser.content + contextSuffix,
        };
      } else if (Array.isArray(lastUser.content)) {
        // Array content (has image/document blocks) — append as additional text block
        // IMPORTANT: Do NOT JSON.stringify array content, as this would turn
        // image base64 data into text tokens (~170K) instead of image tokens (~1,600)
        result[lastUserIndex] = {
          ...lastUser,
          content: [
            ...lastUser.content,
            { type: 'text' as const, text: contextSuffix },
          ],
        };
      }
    }
    return result;
@ -457,8 +469,9 @@ export class ContextInjectorService {
      return { messages, wasCompacted: false };
    }
-    // Rough token estimation: ~3 chars per token for Chinese
+    // Token estimation: strip base64 image/document data before measuring
-    const estimatedTokens = JSON.stringify(messages).length / 3;
+    // (images are counted by pixels, not by base64 text size)
    const estimatedTokens = this.estimateMessageTokens(messages);
    if (estimatedTokens < this.config.compactionThreshold) {
      return { messages, wasCompacted: false };
@ -518,6 +531,39 @@ export class ContextInjectorService {
    }
  }
  // ============================================================
  // Token Estimation
  // ============================================================
  /**
   * Estimate token count for messages, excluding base64 binary data.
   * Images/documents are counted as ~1,600 tokens each (by pixels, not base64 size).
   * Text content is estimated at ~3 chars/token.
   */
  private estimateMessageTokens(messages: ClaudeMessage[]): number {
    let tokens = 0;
    for (const msg of messages) {
      if (typeof msg.content === 'string') {
        tokens += Math.ceil(msg.content.length / 3);
      } else if (Array.isArray(msg.content)) {
        for (const block of msg.content) {
          if (block.type === 'text' && 'text' in block) {
            tokens += Math.ceil(block.text.length / 3);
          } else if (block.type === 'image') {
            tokens += 1600; // ~1,600 tokens per image (Claude API counts by pixels)
          } else if (block.type === 'document') {
            tokens += 3000; // Approximate for PDF documents
          } else if (block.type === 'tool_result' && 'content' in block) {
            tokens += Math.ceil(String(block.content).length / 3);
          } else if (block.type === 'tool_use') {
            tokens += Math.ceil(JSON.stringify(block.input || {}).length / 3) + 50;
          }
        }
      }
    }
    return tokens;
  }
  // ============================================================
  // Cache Helpers
  // ============================================================
--- a/packages/services/conversation-service/src/infrastructure/agents/types/agent.types.ts
+++ b/packages/services/conversation-service/src/infrastructure/agents/types/agent.types.ts
@ -302,7 +302,8 @@ export type ContentBlock =
  | { type: 'text'; text: string }
  | { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> }
  | { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean }
-  | { type: 'image'; source: { type: 'base64'; media_type: string; data: string } };
+  | { type: 'image'; source: { type: 'base64'; media_type: string; data: string } }
  | { type: 'document'; source: { type: 'base64'; media_type: string; data: string }; title?: string };
 /** Tool Definition for Claude API */
 export interface ToolDefinition {