diff --git a/packages/services/agent-service/src/domain/services/conversation-context.service.ts b/packages/services/agent-service/src/domain/services/conversation-context.service.ts index 620d998..f6cb9b2 100644 --- a/packages/services/agent-service/src/domain/services/conversation-context.service.ts +++ b/packages/services/agent-service/src/domain/services/conversation-context.service.ts @@ -87,17 +87,30 @@ export class ConversationContextService { for (const msg of messages) { if (msg.role === 'user') { if (msg.attachments && msg.attachments.length > 0) { - // Build multimodal content blocks for messages with images + // Build multimodal content blocks for messages with attachments const contentBlocks: any[] = []; for (const att of msg.attachments) { - contentBlocks.push({ - type: 'image', - source: { - type: 'base64', - media_type: att.mediaType, - data: att.base64Data, - }, - }); + if (att.mediaType === 'application/pdf') { + // PDF → Anthropic document content block + contentBlocks.push({ + type: 'document', + source: { + type: 'base64', + media_type: 'application/pdf', + data: att.base64Data, + }, + }); + } else { + // Image → Anthropic image content block + contentBlocks.push({ + type: 'image', + source: { + type: 'base64', + media_type: att.mediaType, + data: att.base64Data, + }, + }); + } } if (msg.content && msg.content !== '[图片]') { contentBlocks.push({ type: 'text', text: msg.content }); diff --git a/packages/services/agent-service/src/infrastructure/engines/claude-agent-sdk/claude-agent-sdk-engine.ts b/packages/services/agent-service/src/infrastructure/engines/claude-agent-sdk/claude-agent-sdk-engine.ts index 0d4b8c7..6c6b060 100644 --- a/packages/services/agent-service/src/infrastructure/engines/claude-agent-sdk/claude-agent-sdk-engine.ts +++ b/packages/services/agent-service/src/infrastructure/engines/claude-agent-sdk/claude-agent-sdk-engine.ts @@ -470,19 +470,21 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort { return params.prompt; } - // Check if the last message in history is a user message with image content blocks + // Check if the last message in history is a user message with multimodal content blocks const lastMsg = history[history.length - 1]; - const hasImageBlocks = + const hasMultimodalBlocks = lastMsg.role === 'user' && Array.isArray(lastMsg.content) && - lastMsg.content.some((block: any) => block.type === 'image'); + lastMsg.content.some( + (block: any) => block.type === 'image' || block.type === 'document', + ); - if (!hasImageBlocks) { + if (!hasMultimodalBlocks) { return params.prompt; } - // Build a single SDKUserMessage with multimodal content - this.logger.log(`Building multimodal SDK prompt with image content blocks`); + // Build a single SDKUserMessage with multimodal content (images + documents) + this.logger.log(`Building multimodal SDK prompt with content blocks`); const userMessage = { type: 'user' as const,