fix: PDF attachments sent as document blocks instead of image blocks

PDF files were incorrectly wrapped as type:'image' content blocks,
causing Claude API to reject them as "Invalid image data".

- conversation-context.service: check mediaType for application/pdf,
  use type:'document' block (Anthropic native PDF support) instead
- claude-agent-sdk-engine: detect both 'image' and 'document' blocks
  when deciding to build multimodal SDK prompt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-28 05:27:41 -08:00
parent 3025910095
commit c9367ee22a
2 changed files with 30 additions and 15 deletions

View File

@ -87,17 +87,30 @@ export class ConversationContextService {
for (const msg of messages) {
if (msg.role === 'user') {
if (msg.attachments && msg.attachments.length > 0) {
// Build multimodal content blocks for messages with images
// Build multimodal content blocks for messages with attachments
const contentBlocks: any[] = [];
for (const att of msg.attachments) {
contentBlocks.push({
type: 'image',
source: {
type: 'base64',
media_type: att.mediaType,
data: att.base64Data,
},
});
if (att.mediaType === 'application/pdf') {
// PDF → Anthropic document content block
contentBlocks.push({
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: att.base64Data,
},
});
} else {
// Image → Anthropic image content block
contentBlocks.push({
type: 'image',
source: {
type: 'base64',
media_type: att.mediaType,
data: att.base64Data,
},
});
}
}
if (msg.content && msg.content !== '[图片]') {
contentBlocks.push({ type: 'text', text: msg.content });

View File

@ -470,19 +470,21 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
return params.prompt;
}
// Check if the last message in history is a user message with image content blocks
// Check if the last message in history is a user message with multimodal content blocks
const lastMsg = history[history.length - 1];
const hasImageBlocks =
const hasMultimodalBlocks =
lastMsg.role === 'user' &&
Array.isArray(lastMsg.content) &&
lastMsg.content.some((block: any) => block.type === 'image');
lastMsg.content.some(
(block: any) => block.type === 'image' || block.type === 'document',
);
if (!hasImageBlocks) {
if (!hasMultimodalBlocks) {
return params.prompt;
}
// Build a single SDKUserMessage with multimodal content
this.logger.log(`Building multimodal SDK prompt with image content blocks`);
// Build a single SDKUserMessage with multimodal content (images + documents)
this.logger.log(`Building multimodal SDK prompt with content blocks`);
const userMessage = {
type: 'user' as const,