From c9ee93fffd066d178878275ba7b42607c2e9755c Mon Sep 17 00:00:00 2001 From: hailin Date: Mon, 9 Mar 2026 21:18:14 -0700 Subject: [PATCH] feat(instance-chat): full multimodal attachment support via OpenClaw bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After verifying that the OpenClaw gateway's chat.send WebSocket RPC accepts an 'attachments' array (confirmed from openclaw/openclaw source and documentation), implement end-to-end image/file attachment support for instance chat: Bridge (openclaw-client.ts): - chatSendAndWait() now accepts optional `attachments[]` parameter - Passes attachments to chat.send RPC only when non-empty Bridge (index.ts): - /task-async accepts `attachments[]` from request body - Forwards to chatSendAndWait unchanged Backend (agent.controller.ts): - executeInstanceTask() accepts IT0 attachment format { base64Data, mediaType, fileName? } - Converts to OpenClaw format { name, mimeType, media: "data:..." } - Saves attachments to conversation history via contextService - Forwards to bridge via bridgeAttachments spread Flutter (agent_instance_chat_remote_datasource.dart): - createTask() now includes attachments in POST body when present Flutter (chat_page.dart): - Reverted Fix 5 (disabled button) — attachment button fully enabled in instance mode since the bridge now supports it Attachment format (OpenClaw wire): { name: string, mimeType: string, media: "data:;base64," } Co-Authored-By: Claude Sonnet 4.6 --- .../agent_instance_chat_remote_datasource.dart | 2 +- .../chat/presentation/pages/chat_page.dart | 14 ++++---------- packages/openclaw-bridge/src/index.ts | 7 +++++++ .../openclaw-bridge/src/openclaw-client.ts | 16 +++++++++++----- .../rest/controllers/agent.controller.ts | 18 ++++++++++++++++-- 5 files changed, 39 insertions(+), 18 deletions(-) diff --git a/it0_app/lib/features/agent_instance_chat/data/datasources/agent_instance_chat_remote_datasource.dart b/it0_app/lib/features/agent_instance_chat/data/datasources/agent_instance_chat_remote_datasource.dart index e031ce9..cc49cb7 100644 --- a/it0_app/lib/features/agent_instance_chat/data/datasources/agent_instance_chat_remote_datasource.dart +++ b/it0_app/lib/features/agent_instance_chat/data/datasources/agent_instance_chat_remote_datasource.dart @@ -29,7 +29,7 @@ class AgentInstanceChatDatasource implements ChatRemoteDatasource { data: { 'prompt': message, if (sessionId != 'new') 'sessionId': sessionId, - // Note: attachments are not yet supported for instance chat + if (attachments != null && attachments.isNotEmpty) 'attachments': attachments, }, ); return response.data as Map; diff --git a/it0_app/lib/features/chat/presentation/pages/chat_page.dart b/it0_app/lib/features/chat/presentation/pages/chat_page.dart index ec43d28..a21ecf5 100644 --- a/it0_app/lib/features/chat/presentation/pages/chat_page.dart +++ b/it0_app/lib/features/chat/presentation/pages/chat_page.dart @@ -720,16 +720,10 @@ class _ChatPageState extends ConsumerState { if (!isStreaming) Padding( padding: const EdgeInsets.only(left: 4), - child: Tooltip( - message: widget.agentName != null - ? '附件功能暂不支持智能体对话' - : AppLocalizations.of(context).chatAddImageTooltip, - child: IconButton( - icon: const Icon(Icons.add_circle_outline, size: 22), - onPressed: (isAwaitingApproval || widget.agentName != null) - ? null - : _showAttachmentOptions, - ), + child: IconButton( + icon: const Icon(Icons.add_circle_outline, size: 22), + tooltip: AppLocalizations.of(context).chatAddImageTooltip, + onPressed: isAwaitingApproval ? null : _showAttachmentOptions, ), ), Expanded( diff --git a/packages/openclaw-bridge/src/index.ts b/packages/openclaw-bridge/src/index.ts index b3c6ade..79e38b8 100644 --- a/packages/openclaw-bridge/src/index.ts +++ b/packages/openclaw-bridge/src/index.ts @@ -145,6 +145,12 @@ app.post('/task-async', async (req, res) => { const timeoutSeconds: number = req.body.timeoutSeconds ?? 120; // 2 min default for async tasks const idempotencyKey: string = req.body.idempotencyKey ?? crypto.randomUUID(); const callbackData = req.body.callbackData ?? {}; + // Optional attachments — passed through to OpenClaw chat.send unchanged. + // Expected format: [{ name, mimeType, media }] where media is a data-URI. + const attachments: Array<{ name: string; mimeType: string; media: string }> | undefined = + Array.isArray(req.body.attachments) && req.body.attachments.length > 0 + ? req.body.attachments + : undefined; // Return immediately — LLM runs in background res.json({ ok: true, pending: true }); @@ -162,6 +168,7 @@ app.post('/task-async', async (req, res) => { message: req.body.prompt, idempotencyKey, timeoutMs: timeoutSeconds * 1000, + attachments, }).then((reply: string) => { postCallback({ ok: true, result: reply, callbackData }); }).catch((err: Error) => { diff --git a/packages/openclaw-bridge/src/openclaw-client.ts b/packages/openclaw-bridge/src/openclaw-client.ts index 95c4c8e..fe3f521 100644 --- a/packages/openclaw-bridge/src/openclaw-client.ts +++ b/packages/openclaw-bridge/src/openclaw-client.ts @@ -298,17 +298,23 @@ export class OpenClawClient { message: string; idempotencyKey: string; timeoutMs?: number; + /** Optional media attachments (images, PDFs, etc.) in OpenClaw format. */ + attachments?: Array<{ name: string; mimeType: string; media: string }>; }): Promise { const timeoutMs = params.timeoutMs ?? 30_000; // Send chat.send — resolves immediately with { runId, status: "started" } + const chatSendParams: Record = { + sessionKey: params.sessionKey, + message: params.message, + idempotencyKey: params.idempotencyKey, + }; + if (params.attachments && params.attachments.length > 0) { + chatSendParams['attachments'] = params.attachments; + } const ack = await this.rpc( 'chat.send', - { - sessionKey: params.sessionKey, - message: params.message, - idempotencyKey: params.idempotencyKey, - }, + chatSendParams, 10_000, // 10s for the initial ack ) as { runId: string; status: string }; diff --git a/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts b/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts index 4cc8a05..2b399d9 100644 --- a/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts +++ b/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts @@ -827,7 +827,11 @@ export class AgentController { @TenantId() tenantId: string, @Req() req: any, @Param('instanceId') instanceId: string, - @Body() body: { prompt: string; sessionId?: string }, + @Body() body: { + prompt: string; + sessionId?: string; + attachments?: Array<{ base64Data: string; mediaType: string; fileName?: string }>; + }, ) { const instance = await this.instanceRepository.findById(instanceId); if (!instance) throw new NotFoundException(`Instance ${instanceId} not found`); @@ -883,7 +887,7 @@ export class AgentController { await this.taskRepository.save(task); // Persist user message for display in conversation history - await this.contextService.saveUserMessage(session.id, body.prompt); + await this.contextService.saveUserMessage(session.id, body.prompt, body.attachments); // The OpenClaw bridge tracks conversation context internally via sessionKey. // We use our DB session ID as the key so each session has isolated context. @@ -891,6 +895,15 @@ export class AgentController { const callbackUrl = `${process.env.AGENT_SERVICE_PUBLIC_URL}/api/v1/agent/instances/openclaw-app-callback`; const bridgeUrl = `http://${instance.serverHost}:${instance.hostPort}/task-async`; + // Convert IT0 attachment format → OpenClaw format for the bridge. + // IT0: { base64Data, mediaType, fileName? } + // OpenClaw: { name, mimeType, media: "data:;base64," } + const bridgeAttachments = body.attachments?.map((att) => ({ + name: att.fileName ?? 'attachment', + mimeType: att.mediaType, + media: `data:${att.mediaType};base64,${att.base64Data}`, + })); + this.logger.log( `[Task ${task.id}] Routing to OpenClaw instance ${instanceId} @ ${bridgeUrl}, session=${session.id}`, ); @@ -946,6 +959,7 @@ export class AgentController { idempotencyKey: task.id, callbackUrl, callbackData: { sessionId: session.id, taskId: task.id }, + ...(bridgeAttachments && bridgeAttachments.length > 0 && { attachments: bridgeAttachments }), }), }) .then(async (res) => {