fix: PDF attachments sent as document blocks instead of image blocks

PDF files were incorrectly wrapped as type:'image' content blocks,
causing Claude API to reject them as "Invalid image data".

- conversation-context.service: check mediaType for application/pdf,
  use type:'document' block (Anthropic native PDF support) instead
- claude-agent-sdk-engine: detect both 'image' and 'document' blocks
  when deciding to build multimodal SDK prompt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-28 05:27:41 -08:00
parent 3025910095
commit c9367ee22a
2 changed files with 30 additions and 15 deletions

View File

@ -87,9 +87,21 @@ export class ConversationContextService {
for (const msg of messages) { for (const msg of messages) {
if (msg.role === 'user') { if (msg.role === 'user') {
if (msg.attachments && msg.attachments.length > 0) { if (msg.attachments && msg.attachments.length > 0) {
// Build multimodal content blocks for messages with images // Build multimodal content blocks for messages with attachments
const contentBlocks: any[] = []; const contentBlocks: any[] = [];
for (const att of msg.attachments) { for (const att of msg.attachments) {
if (att.mediaType === 'application/pdf') {
// PDF → Anthropic document content block
contentBlocks.push({
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: att.base64Data,
},
});
} else {
// Image → Anthropic image content block
contentBlocks.push({ contentBlocks.push({
type: 'image', type: 'image',
source: { source: {
@ -99,6 +111,7 @@ export class ConversationContextService {
}, },
}); });
} }
}
if (msg.content && msg.content !== '[图片]') { if (msg.content && msg.content !== '[图片]') {
contentBlocks.push({ type: 'text', text: msg.content }); contentBlocks.push({ type: 'text', text: msg.content });
} }

View File

@ -470,19 +470,21 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
return params.prompt; return params.prompt;
} }
// Check if the last message in history is a user message with image content blocks // Check if the last message in history is a user message with multimodal content blocks
const lastMsg = history[history.length - 1]; const lastMsg = history[history.length - 1];
const hasImageBlocks = const hasMultimodalBlocks =
lastMsg.role === 'user' && lastMsg.role === 'user' &&
Array.isArray(lastMsg.content) && Array.isArray(lastMsg.content) &&
lastMsg.content.some((block: any) => block.type === 'image'); lastMsg.content.some(
(block: any) => block.type === 'image' || block.type === 'document',
);
if (!hasImageBlocks) { if (!hasMultimodalBlocks) {
return params.prompt; return params.prompt;
} }
// Build a single SDKUserMessage with multimodal content // Build a single SDKUserMessage with multimodal content (images + documents)
this.logger.log(`Building multimodal SDK prompt with image content blocks`); this.logger.log(`Building multimodal SDK prompt with content blocks`);
const userMessage = { const userMessage = {
type: 'user' as const, type: 'user' as const,