fix: PDF attachments sent as document blocks instead of image blocks
PDF files were incorrectly wrapped as type:'image' content blocks, causing Claude API to reject them as "Invalid image data". - conversation-context.service: check mediaType for application/pdf, use type:'document' block (Anthropic native PDF support) instead - claude-agent-sdk-engine: detect both 'image' and 'document' blocks when deciding to build multimodal SDK prompt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3025910095
commit
c9367ee22a
|
|
@ -87,17 +87,30 @@ export class ConversationContextService {
|
||||||
for (const msg of messages) {
|
for (const msg of messages) {
|
||||||
if (msg.role === 'user') {
|
if (msg.role === 'user') {
|
||||||
if (msg.attachments && msg.attachments.length > 0) {
|
if (msg.attachments && msg.attachments.length > 0) {
|
||||||
// Build multimodal content blocks for messages with images
|
// Build multimodal content blocks for messages with attachments
|
||||||
const contentBlocks: any[] = [];
|
const contentBlocks: any[] = [];
|
||||||
for (const att of msg.attachments) {
|
for (const att of msg.attachments) {
|
||||||
contentBlocks.push({
|
if (att.mediaType === 'application/pdf') {
|
||||||
type: 'image',
|
// PDF → Anthropic document content block
|
||||||
source: {
|
contentBlocks.push({
|
||||||
type: 'base64',
|
type: 'document',
|
||||||
media_type: att.mediaType,
|
source: {
|
||||||
data: att.base64Data,
|
type: 'base64',
|
||||||
},
|
media_type: 'application/pdf',
|
||||||
});
|
data: att.base64Data,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Image → Anthropic image content block
|
||||||
|
contentBlocks.push({
|
||||||
|
type: 'image',
|
||||||
|
source: {
|
||||||
|
type: 'base64',
|
||||||
|
media_type: att.mediaType,
|
||||||
|
data: att.base64Data,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (msg.content && msg.content !== '[图片]') {
|
if (msg.content && msg.content !== '[图片]') {
|
||||||
contentBlocks.push({ type: 'text', text: msg.content });
|
contentBlocks.push({ type: 'text', text: msg.content });
|
||||||
|
|
|
||||||
|
|
@ -470,19 +470,21 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
|
||||||
return params.prompt;
|
return params.prompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the last message in history is a user message with image content blocks
|
// Check if the last message in history is a user message with multimodal content blocks
|
||||||
const lastMsg = history[history.length - 1];
|
const lastMsg = history[history.length - 1];
|
||||||
const hasImageBlocks =
|
const hasMultimodalBlocks =
|
||||||
lastMsg.role === 'user' &&
|
lastMsg.role === 'user' &&
|
||||||
Array.isArray(lastMsg.content) &&
|
Array.isArray(lastMsg.content) &&
|
||||||
lastMsg.content.some((block: any) => block.type === 'image');
|
lastMsg.content.some(
|
||||||
|
(block: any) => block.type === 'image' || block.type === 'document',
|
||||||
|
);
|
||||||
|
|
||||||
if (!hasImageBlocks) {
|
if (!hasMultimodalBlocks) {
|
||||||
return params.prompt;
|
return params.prompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build a single SDKUserMessage with multimodal content
|
// Build a single SDKUserMessage with multimodal content (images + documents)
|
||||||
this.logger.log(`Building multimodal SDK prompt with image content blocks`);
|
this.logger.log(`Building multimodal SDK prompt with content blocks`);
|
||||||
|
|
||||||
const userMessage = {
|
const userMessage = {
|
||||||
type: 'user' as const,
|
type: 'user' as const,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue