fix(agents): preserve image content blocks in context injection — fixes 209K token overflow
injectIntoMessages() was JSON.stringify-ing array content (with image blocks), turning base64 data into text tokens (~170K) instead of image tokens (~1,600). Fix: append context as a new text block in the array, preserving image block format. Also fixes token estimation to count images at ~1,600 tokens instead of base64 char length, and adds debug logging for API call token composition. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7dc364d9b3
commit
fa835e4f56
|
|
@ -144,9 +144,32 @@ export async function* agentLoop(
|
|||
}
|
||||
|
||||
// ---- Call Claude API (Streaming, with retry for rate limits) ----
|
||||
logger.debug(
|
||||
`[Turn ${currentTurn + 1}/${maxTurns}] Calling Claude API with ${messages.length} messages`,
|
||||
);
|
||||
// Debug: log message composition to diagnose token issues
|
||||
{
|
||||
let textChars = 0;
|
||||
let imageCount = 0;
|
||||
let docCount = 0;
|
||||
for (const msg of messages) {
|
||||
if (typeof msg.content === 'string') {
|
||||
textChars += msg.content.length;
|
||||
} else if (Array.isArray(msg.content)) {
|
||||
for (const block of msg.content as any[]) {
|
||||
if (block.type === 'text') textChars += (block.text || '').length;
|
||||
else if (block.type === 'image') imageCount++;
|
||||
else if (block.type === 'document') docCount++;
|
||||
else if (block.type === 'tool_result') textChars += String(block.content || '').length;
|
||||
}
|
||||
}
|
||||
}
|
||||
const systemChars = Array.isArray(systemPrompt)
|
||||
? systemPrompt.reduce((sum: number, b: any) => sum + (b.text?.length || 0), 0)
|
||||
: 0;
|
||||
logger.log(
|
||||
`[Turn ${currentTurn + 1}/${maxTurns}] API call: ${messages.length} msgs, ` +
|
||||
`~${Math.round(textChars / 3)} text tokens, ${imageCount} images (~${imageCount * 1600} tokens), ` +
|
||||
`${docCount} docs, system prompt ~${Math.round(systemChars / 3)} tokens`,
|
||||
);
|
||||
}
|
||||
|
||||
let stream!: ReturnType<typeof anthropicClient.messages.stream>;
|
||||
const MAX_RETRIES = 2;
|
||||
|
|
|
|||
|
|
@ -428,14 +428,26 @@ export class ContextInjectorService {
|
|||
|
||||
if (lastUserIndex >= 0) {
|
||||
const lastUser = result[lastUserIndex];
|
||||
const originalContent = typeof lastUser.content === 'string'
|
||||
? lastUser.content
|
||||
: JSON.stringify(lastUser.content);
|
||||
const contextSuffix = `\n\n<system-context>\n${injectionText}\n</system-context>`;
|
||||
|
||||
result[lastUserIndex] = {
|
||||
...lastUser,
|
||||
content: `${originalContent}\n\n<system-context>\n${injectionText}\n</system-context>`,
|
||||
};
|
||||
if (typeof lastUser.content === 'string') {
|
||||
// Simple string content — append as text
|
||||
result[lastUserIndex] = {
|
||||
...lastUser,
|
||||
content: lastUser.content + contextSuffix,
|
||||
};
|
||||
} else if (Array.isArray(lastUser.content)) {
|
||||
// Array content (has image/document blocks) — append as additional text block
|
||||
// IMPORTANT: Do NOT JSON.stringify array content, as this would turn
|
||||
// image base64 data into text tokens (~170K) instead of image tokens (~1,600)
|
||||
result[lastUserIndex] = {
|
||||
...lastUser,
|
||||
content: [
|
||||
...lastUser.content,
|
||||
{ type: 'text' as const, text: contextSuffix },
|
||||
],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -457,8 +469,9 @@ export class ContextInjectorService {
|
|||
return { messages, wasCompacted: false };
|
||||
}
|
||||
|
||||
// Rough token estimation: ~3 chars per token for Chinese
|
||||
const estimatedTokens = JSON.stringify(messages).length / 3;
|
||||
// Token estimation: strip base64 image/document data before measuring
|
||||
// (images are counted by pixels, not by base64 text size)
|
||||
const estimatedTokens = this.estimateMessageTokens(messages);
|
||||
|
||||
if (estimatedTokens < this.config.compactionThreshold) {
|
||||
return { messages, wasCompacted: false };
|
||||
|
|
@ -518,6 +531,39 @@ export class ContextInjectorService {
|
|||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Token Estimation
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Estimate token count for messages, excluding base64 binary data.
|
||||
* Images/documents are counted as ~1,600 tokens each (by pixels, not base64 size).
|
||||
* Text content is estimated at ~3 chars/token.
|
||||
*/
|
||||
private estimateMessageTokens(messages: ClaudeMessage[]): number {
|
||||
let tokens = 0;
|
||||
for (const msg of messages) {
|
||||
if (typeof msg.content === 'string') {
|
||||
tokens += Math.ceil(msg.content.length / 3);
|
||||
} else if (Array.isArray(msg.content)) {
|
||||
for (const block of msg.content) {
|
||||
if (block.type === 'text' && 'text' in block) {
|
||||
tokens += Math.ceil(block.text.length / 3);
|
||||
} else if (block.type === 'image') {
|
||||
tokens += 1600; // ~1,600 tokens per image (Claude API counts by pixels)
|
||||
} else if (block.type === 'document') {
|
||||
tokens += 3000; // Approximate for PDF documents
|
||||
} else if (block.type === 'tool_result' && 'content' in block) {
|
||||
tokens += Math.ceil(String(block.content).length / 3);
|
||||
} else if (block.type === 'tool_use') {
|
||||
tokens += Math.ceil(JSON.stringify(block.input || {}).length / 3) + 50;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Cache Helpers
|
||||
// ============================================================
|
||||
|
|
|
|||
|
|
@ -302,7 +302,8 @@ export type ContentBlock =
|
|||
| { type: 'text'; text: string }
|
||||
| { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> }
|
||||
| { type: 'tool_result'; tool_use_id: string; content: string; is_error?: boolean }
|
||||
| { type: 'image'; source: { type: 'base64'; media_type: string; data: string } };
|
||||
| { type: 'image'; source: { type: 'base64'; media_type: string; data: string } }
|
||||
| { type: 'document'; source: { type: 'base64'; media_type: string; data: string }; title?: string };
|
||||
|
||||
/** Tool Definition for Claude API */
|
||||
export interface ToolDefinition {
|
||||
|
|
|
|||
Loading…
Reference in New Issue