feat: add multimodal image input with streaming markdown optimization

Two major features in this commit:

1. Streaming Markdown Rendering Optimization
   - Replace deprecated flutter_markdown with gpt_markdown (active, AI-optimized)
   - Real-time markdown rendering during streaming (was showing raw syntax)
   - Solid block cursor (█) instead of AnimationController blink
   - 80ms token throttle buffer reducing rebuilds from per-token to ~12.5/sec
   - RepaintBoundary isolation for markdown widget repaints
   - StreamTextWidget simplified from StatefulWidget to StatelessWidget

2. Multimodal Image Input (camera + gallery + display)
   - Flutter: image_picker for gallery/camera, base64 encoding, attachment
     preview strip with delete, thumbnails in sent messages
   - Data layer: List<String>? → List<Map<String, dynamic>>? for structured
     attachment payloads through datasource/repository/usecase
   - ChatAttachment model with base64Data, mediaType, fileName
   - ChatMessage entity + ChatMessageModel both support attachments field
   - Backend DTO, Entity (JSONB), Controller, ConversationContextService
     all extended to receive, store, and reconstruct Anthropic image
     content blocks in loadContext()
   - Claude API engine skips duplicate user message when history already
     ends with multimodal content blocks
   - NestJS body parser limit raised to 10MB for base64 image payloads
   - Android CAMERA permission added to manifest
   - Image.memory uses cacheWidth/cacheHeight for memory efficiency
   - Max 5 images per message enforced in UI

Data flow:
  ImagePicker → base64Encode → ChatAttachment → POST body →
  DB (JSONB) → loadContext → Anthropic image content blocks → Claude API

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-28 03:24:17 -08:00
parent 89f0f6134d
commit e4c2505048
20 changed files with 556 additions and 215 deletions

View File

@ -1,6 +1,7 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<uses-permission android:name="android.permission.INTERNET"/>
<uses-permission android:name="android.permission.RECORD_AUDIO"/>
<uses-permission android:name="android.permission.CAMERA"/>
<uses-permission android:name="android.permission.POST_NOTIFICATIONS"/>
<uses-permission android:name="android.permission.REQUEST_INSTALL_PACKAGES"/>
<application

View File

@ -25,6 +25,11 @@ public final class GeneratedPluginRegistrant {
} catch (Exception e) {
Log.e(TAG, "Error registering plugin flutter_local_notifications, com.dexterous.flutterlocalnotifications.FlutterLocalNotificationsPlugin", e);
}
try {
flutterEngine.getPlugins().add(new io.flutter.plugins.flutter_plugin_android_lifecycle.FlutterAndroidLifecyclePlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin flutter_plugin_android_lifecycle, io.flutter.plugins.flutter_plugin_android_lifecycle.FlutterAndroidLifecyclePlugin", e);
}
try {
flutterEngine.getPlugins().add(new com.it_nomads.fluttersecurestorage.FlutterSecureStoragePlugin());
} catch (Exception e) {
@ -40,6 +45,11 @@ public final class GeneratedPluginRegistrant {
} catch (Exception e) {
Log.e(TAG, "Error registering plugin flutter_tts, com.eyedeadevelopment.fluttertts.FlutterTtsPlugin", e);
}
try {
flutterEngine.getPlugins().add(new io.flutter.plugins.imagepicker.ImagePickerPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin image_picker_android, io.flutter.plugins.imagepicker.ImagePickerPlugin", e);
}
try {
flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.packageinfo.PackageInfoPlugin());
} catch (Exception e) {

View File

@ -14,7 +14,7 @@ class ChatRemoteDatasource {
Future<Map<String, dynamic>> createTask({
required String sessionId,
required String message,
List<String>? attachments,
List<Map<String, dynamic>>? attachments,
}) async {
final response = await _dio.post(
ApiEndpoints.tasks,

View File

@ -9,6 +9,7 @@ class ChatMessageModel {
final Map<String, dynamic>? toolExecution;
final Map<String, dynamic>? approvalRequest;
final Map<String, dynamic>? metadata;
final List<Map<String, dynamic>>? attachments;
const ChatMessageModel({
required this.id,
@ -19,6 +20,7 @@ class ChatMessageModel {
this.toolExecution,
this.approvalRequest,
this.metadata,
this.attachments,
});
factory ChatMessageModel.fromJson(Map<String, dynamic> json) {
@ -36,6 +38,9 @@ class ChatMessageModel {
approvalRequest: json['approval_request'] as Map<String, dynamic>? ??
json['approvalRequest'] as Map<String, dynamic>?,
metadata: json['metadata'] as Map<String, dynamic>?,
attachments: (json['attachments'] as List<dynamic>?)
?.map((a) => Map<String, dynamic>.from(a as Map))
.toList(),
);
}
@ -49,6 +54,7 @@ class ChatMessageModel {
if (toolExecution != null) 'toolExecution': toolExecution,
if (approvalRequest != null) 'approvalRequest': approvalRequest,
if (metadata != null) 'metadata': metadata,
if (attachments != null) 'attachments': attachments,
};
}
@ -63,6 +69,11 @@ class ChatMessageModel {
toolExecution: toolExecution != null ? _parseToolExecution(toolExecution!) : null,
approvalRequest: approvalRequest != null ? _parseApprovalRequest(approvalRequest!) : null,
metadata: metadata,
attachments: attachments?.map((a) => ChatAttachment(
base64Data: a['base64Data'] as String? ?? '',
mediaType: a['mediaType'] as String? ?? 'image/jpeg',
fileName: a['fileName'] as String?,
)).toList(),
);
}
@ -94,6 +105,7 @@ class ChatMessageModel {
}
: null,
metadata: entity.metadata,
attachments: entity.attachments?.map((a) => a.toJson()).toList(),
);
}

View File

@ -27,7 +27,7 @@ class ChatRepositoryImpl implements ChatRepository {
Stream<StreamEvent> sendMessage({
required String sessionId,
required String message,
List<String>? attachments,
List<Map<String, dynamic>>? attachments,
}) async* {
// Create the task on the backend
final response = await _remoteDatasource.createTask(
@ -87,7 +87,7 @@ class ChatRepositoryImpl implements ChatRepository {
final response = await _remoteDatasource.createTask(
sessionId: sessionId,
message: '[voice_input]',
attachments: [audioPath],
attachments: [{'filePath': audioPath, 'mediaType': 'audio/wav'}],
);
final returnedSessionId = response['sessionId'] as String? ??

View File

@ -4,6 +4,24 @@ enum MessageType { text, toolUse, toolResult, approval, thinking, standingOrderD
enum ToolStatus { executing, completed, error, blocked, awaitingApproval }
class ChatAttachment {
final String base64Data;
final String mediaType;
final String? fileName;
const ChatAttachment({
required this.base64Data,
required this.mediaType,
this.fileName,
});
Map<String, dynamic> toJson() => {
'base64Data': base64Data,
'mediaType': mediaType,
if (fileName != null) 'fileName': fileName,
};
}
class ChatMessage {
final String id;
final MessageRole role;
@ -14,6 +32,7 @@ class ChatMessage {
final ApprovalRequest? approvalRequest;
final bool isStreaming;
final Map<String, dynamic>? metadata;
final List<ChatAttachment>? attachments;
const ChatMessage({
required this.id,
@ -25,6 +44,7 @@ class ChatMessage {
this.approvalRequest,
this.isStreaming = false,
this.metadata,
this.attachments,
});
ChatMessage copyWith({
@ -37,6 +57,7 @@ class ChatMessage {
ApprovalRequest? approvalRequest,
bool? isStreaming,
Map<String, dynamic>? metadata,
List<ChatAttachment>? attachments,
}) {
return ChatMessage(
id: id ?? this.id,
@ -48,6 +69,7 @@ class ChatMessage {
approvalRequest: approvalRequest ?? this.approvalRequest,
isStreaming: isStreaming ?? this.isStreaming,
metadata: metadata ?? this.metadata,
attachments: attachments ?? this.attachments,
);
}
}

View File

@ -6,7 +6,7 @@ abstract class ChatRepository {
Stream<StreamEvent> sendMessage({
required String sessionId,
required String message,
List<String>? attachments,
List<Map<String, dynamic>>? attachments,
});
/// Sends a voice message (audio file path) and returns a stream of events.

View File

@ -9,7 +9,7 @@ class SendMessage {
Stream<StreamEvent> execute({
required String sessionId,
required String message,
List<String>? attachments,
List<Map<String, dynamic>>? attachments,
}) {
return _repository.sendMessage(
sessionId: sessionId,

View File

@ -1,5 +1,7 @@
import 'dart:convert';
import 'package:flutter/material.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import 'package:image_picker/image_picker.dart';
import '../../../../core/theme/app_colors.dart';
import '../../domain/entities/chat_message.dart';
import '../providers/chat_providers.dart';
@ -23,14 +25,21 @@ class ChatPage extends ConsumerStatefulWidget {
class _ChatPageState extends ConsumerState<ChatPage> {
final _messageController = TextEditingController();
final _scrollController = ScrollController();
final List<ChatAttachment> _pendingAttachments = [];
// -- Send ------------------------------------------------------------------
void _send() {
final text = _messageController.text.trim();
if (text.isEmpty) return;
if (text.isEmpty && _pendingAttachments.isEmpty) return;
_messageController.clear();
ref.read(chatProvider.notifier).sendMessage(text);
final attachments = _pendingAttachments.isNotEmpty
? List<ChatAttachment>.from(_pendingAttachments)
: null;
if (_pendingAttachments.isNotEmpty) {
setState(() => _pendingAttachments.clear());
}
ref.read(chatProvider.notifier).sendMessage(text, attachments: attachments);
_scrollToBottom();
}
@ -60,6 +69,109 @@ class _ChatPageState extends ConsumerState<ChatPage> {
);
}
// -- Attachments -----------------------------------------------------------
void _showAttachmentOptions() {
showModalBottomSheet(
context: context,
builder: (ctx) => SafeArea(
child: Column(
mainAxisSize: MainAxisSize.min,
children: [
ListTile(
leading: const Icon(Icons.photo_library),
title: const Text('从相册选择'),
onTap: () { Navigator.pop(ctx); _pickImage(ImageSource.gallery); },
),
ListTile(
leading: const Icon(Icons.camera_alt),
title: const Text('拍照'),
onTap: () { Navigator.pop(ctx); _pickImage(ImageSource.camera); },
),
],
),
),
);
}
static const _maxAttachments = 5;
Future<void> _pickImage(ImageSource source) async {
if (_pendingAttachments.length >= _maxAttachments) {
if (mounted) {
ScaffoldMessenger.of(context).showSnackBar(
const SnackBar(content: Text('最多添加 $_maxAttachments 张图片')),
);
}
return;
}
final picker = ImagePicker();
final picked = await picker.pickImage(
source: source,
maxWidth: 1568,
maxHeight: 1568,
imageQuality: 85,
);
if (picked == null) return;
final bytes = await picked.readAsBytes();
final ext = picked.path.split('.').last.toLowerCase();
final mediaType = switch (ext) {
'png' => 'image/png',
'webp' => 'image/webp',
'gif' => 'image/gif',
_ => 'image/jpeg',
};
setState(() {
_pendingAttachments.add(ChatAttachment(
base64Data: base64Encode(bytes),
mediaType: mediaType,
fileName: picked.name,
));
});
}
Widget _buildAttachmentPreview() {
return SizedBox(
height: 80,
child: ListView.builder(
scrollDirection: Axis.horizontal,
itemCount: _pendingAttachments.length,
itemBuilder: (ctx, i) {
final att = _pendingAttachments[i];
final bytes = base64Decode(att.base64Data);
return Stack(
children: [
Padding(
padding: const EdgeInsets.all(4),
child: ClipRRect(
borderRadius: BorderRadius.circular(8),
child: Image.memory(bytes, width: 72, height: 72, fit: BoxFit.cover, cacheWidth: 144, cacheHeight: 144),
),
),
Positioned(
top: 0,
right: 0,
child: GestureDetector(
onTap: () => setState(() => _pendingAttachments.removeAt(i)),
child: Container(
decoration: const BoxDecoration(
color: Colors.black54,
shape: BoxShape.circle,
),
child: const Icon(Icons.close, size: 16, color: Colors.white),
),
),
),
],
);
},
),
);
}
/// Whether to show a virtual "working" node at the bottom of the timeline.
/// True when the agent is streaming but no assistant message has appeared yet.
bool _needsWorkingNode(ChatState chatState) {
@ -88,6 +200,19 @@ class _ChatPageState extends ConsumerState<ChatPage> {
isFirst: isFirst,
isLast: isLast,
icon: Icons.person_outline,
content: message.attachments != null && message.attachments!.isNotEmpty
? Wrap(
spacing: 4,
runSpacing: 4,
children: message.attachments!.map((att) {
final bytes = base64Decode(att.base64Data);
return ClipRRect(
borderRadius: BorderRadius.circular(8),
child: Image.memory(bytes, width: 120, height: 120, fit: BoxFit.cover, cacheWidth: 240, cacheHeight: 240),
);
}).toList(),
)
: null,
);
}
@ -347,46 +472,57 @@ class _ChatPageState extends ConsumerState<ChatPage> {
color: AppColors.surface,
border: Border(top: BorderSide(color: AppColors.surfaceLight.withOpacity(0.5))),
),
child: Row(
child: Column(
mainAxisSize: MainAxisSize.min,
children: [
Expanded(
child: TextField(
controller: _messageController,
decoration: InputDecoration(
hintText: isStreaming ? '追加指令...' : '输入指令...',
border: const OutlineInputBorder(
borderRadius: BorderRadius.all(Radius.circular(24)),
if (_pendingAttachments.isNotEmpty) _buildAttachmentPreview(),
Row(
children: [
if (!isStreaming)
IconButton(
icon: const Icon(Icons.add_circle_outline),
tooltip: '添加图片',
onPressed: isAwaitingApproval ? null : _showAttachmentOptions,
),
Expanded(
child: TextField(
controller: _messageController,
decoration: InputDecoration(
hintText: isStreaming ? '追加指令...' : '输入指令...',
border: const OutlineInputBorder(
borderRadius: BorderRadius.all(Radius.circular(24)),
),
contentPadding: const EdgeInsets.symmetric(horizontal: 16, vertical: 10),
),
textInputAction: TextInputAction.send,
onSubmitted: (_) => isStreaming ? _inject() : _send(),
enabled: !isAwaitingApproval,
),
contentPadding: const EdgeInsets.symmetric(horizontal: 16, vertical: 10),
),
textInputAction: TextInputAction.send,
onSubmitted: (_) => isStreaming ? _inject() : _send(),
enabled: !isAwaitingApproval,
),
const SizedBox(width: 8),
if (isStreaming)
Row(
mainAxisSize: MainAxisSize.min,
children: [
IconButton(
icon: const Icon(Icons.send, color: AppColors.info),
tooltip: '追加指令',
onPressed: _inject,
),
IconButton(
icon: const Icon(Icons.stop_circle_outlined, color: AppColors.error),
tooltip: '停止',
onPressed: () => ref.read(chatProvider.notifier).cancelCurrentTask(),
),
],
)
else
IconButton(
icon: const Icon(Icons.send),
onPressed: isAwaitingApproval ? null : _send,
),
],
),
const SizedBox(width: 8),
if (isStreaming)
// During streaming: show both inject-send and stop buttons
Row(
mainAxisSize: MainAxisSize.min,
children: [
IconButton(
icon: const Icon(Icons.send, color: AppColors.info),
tooltip: '追加指令',
onPressed: _inject,
),
IconButton(
icon: const Icon(Icons.stop_circle_outlined, color: AppColors.error),
tooltip: '停止',
onPressed: () => ref.read(chatProvider.notifier).cancelCurrentTask(),
),
],
)
else
IconButton(
icon: const Icon(Icons.send),
onPressed: isAwaitingApproval ? null : _send,
),
],
),
);

View File

@ -125,19 +125,26 @@ class ChatNotifier extends StateNotifier<ChatState> {
final Ref _ref;
StreamSubscription<StreamEvent>? _eventSubscription;
// Token throttle: buffer text tokens and flush every 80ms to reduce rebuilds
final StringBuffer _textBuffer = StringBuffer();
final StringBuffer _thinkingBuffer = StringBuffer();
Timer? _flushTimer;
static const _flushInterval = Duration(milliseconds: 80);
ChatNotifier(this._ref) : super(const ChatState());
/// Sends a user message to the agent and processes the streamed response.
Future<void> sendMessage(String prompt) async {
if (prompt.trim().isEmpty) return;
Future<void> sendMessage(String prompt, {List<ChatAttachment>? attachments}) async {
if (prompt.trim().isEmpty && (attachments == null || attachments.isEmpty)) return;
// Add the user message locally
final userMsg = ChatMessage(
id: DateTime.now().microsecondsSinceEpoch.toString(),
role: MessageRole.user,
content: prompt,
content: prompt.isEmpty ? '[图片]' : prompt,
timestamp: DateTime.now(),
type: MessageType.text,
attachments: attachments,
);
state = state.copyWith(
@ -153,7 +160,8 @@ class ChatNotifier extends StateNotifier<ChatState> {
final stream = useCase.execute(
sessionId: sessionId ?? 'new',
message: prompt,
message: prompt.isEmpty ? '[图片]' : prompt,
attachments: attachments?.map((a) => a.toJson()).toList(),
);
_eventSubscription?.cancel();
@ -183,11 +191,15 @@ class ChatNotifier extends StateNotifier<ChatState> {
switch (event) {
case ThinkingEvent(:final content):
_appendOrUpdateAssistantMessage(content, MessageType.thinking);
state = state.copyWith(agentStatus: AgentStatus.thinking);
if (state.agentStatus != AgentStatus.thinking) {
state = state.copyWith(agentStatus: AgentStatus.thinking);
}
case TextEvent(:final content):
_appendOrUpdateAssistantMessage(content, MessageType.text);
state = state.copyWith(agentStatus: AgentStatus.executing);
if (state.agentStatus != AgentStatus.executing) {
state = state.copyWith(agentStatus: AgentStatus.executing);
}
case ToolUseEvent(:final toolName, :final input):
final msg = ChatMessage(
@ -259,11 +271,16 @@ class ChatNotifier extends StateNotifier<ChatState> {
);
case CompletedEvent(:final summary):
_flushBuffersSync();
final hasAssistantText = state.messages.any(
(m) => m.role == MessageRole.assistant && m.type == MessageType.text && m.content.isNotEmpty,
);
if (summary.isNotEmpty && !hasAssistantText) {
_appendOrUpdateAssistantMessage(summary, MessageType.text);
// Write summary directly to state (not via buffer) since we're about
// to set idle status buffering would cause a brief missing-text gap.
state = state.copyWith(
messages: _applyBuffer(state.messages, summary, MessageType.text),
);
}
// Mark any remaining executing tools as completed
final finalMessages = state.messages.map((m) {
@ -284,6 +301,7 @@ class ChatNotifier extends StateNotifier<ChatState> {
);
case ErrorEvent(:final message):
_flushBuffersSync();
state = state.copyWith(
agentStatus: AgentStatus.error,
error: message,
@ -334,40 +352,83 @@ class ChatNotifier extends StateNotifier<ChatState> {
}
void _appendOrUpdateAssistantMessage(String content, MessageType type) {
if (state.messages.isNotEmpty) {
final last = state.messages.last;
// Buffer tokens and flush on a timer to reduce widget rebuilds
if (type == MessageType.thinking) {
_thinkingBuffer.write(content);
} else {
_textBuffer.write(content);
}
_flushTimer ??= Timer(_flushInterval, _flushBuffers);
}
/// Flush buffered tokens to state immediately (synchronous).
void _flushBuffersSync() {
_flushTimer?.cancel();
_flushTimer = null;
_flushBuffers();
}
/// Flush any buffered text/thinking tokens into state.messages.
void _flushBuffers() {
_flushTimer = null;
if (_textBuffer.isEmpty && _thinkingBuffer.isEmpty) return;
var messages = [...state.messages];
if (_textBuffer.isNotEmpty) {
messages = _applyBuffer(messages, _textBuffer.toString(), MessageType.text);
_textBuffer.clear();
}
if (_thinkingBuffer.isNotEmpty) {
messages = _applyBuffer(messages, _thinkingBuffer.toString(), MessageType.thinking);
_thinkingBuffer.clear();
}
state = state.copyWith(messages: messages);
}
List<ChatMessage> _applyBuffer(
List<ChatMessage> messages,
String content,
MessageType type,
) {
if (messages.isNotEmpty) {
final last = messages.last;
if (last.role == MessageRole.assistant && last.type == type) {
final updated = last.copyWith(content: last.content + content);
state = state.copyWith(
messages: [
...state.messages.sublist(0, state.messages.length - 1),
updated,
],
);
return;
return [
...messages.sublist(0, messages.length - 1),
last.copyWith(content: last.content + content),
];
}
}
final msg = ChatMessage(
id: DateTime.now().microsecondsSinceEpoch.toString(),
role: MessageRole.assistant,
content: content,
timestamp: DateTime.now(),
type: type,
isStreaming: true,
);
state = state.copyWith(messages: [...state.messages, msg]);
return [
...messages,
ChatMessage(
id: DateTime.now().microsecondsSinceEpoch.toString(),
role: MessageRole.assistant,
content: content,
timestamp: DateTime.now(),
type: type,
isStreaming: true,
),
];
}
/// Starts a new chat clears messages and resets sessionId.
void startNewChat() {
_eventSubscription?.cancel();
_flushTimer?.cancel();
_flushTimer = null;
_textBuffer.clear();
_thinkingBuffer.clear();
state = const ChatState();
}
/// Switches to an existing session loads its messages from the backend.
Future<void> switchSession(String sessionId) async {
_eventSubscription?.cancel();
_flushTimer?.cancel();
_flushTimer = null;
_textBuffer.clear();
_thinkingBuffer.clear();
state = ChatState(sessionId: sessionId, agentStatus: AgentStatus.idle);
try {
@ -431,6 +492,9 @@ class ChatNotifier extends StateNotifier<ChatState> {
final taskId = state.taskId;
if (taskId == null && state.sessionId == null) return;
// Flush any buffered tokens before cancelling
_flushBuffersSync();
// 1. IMMEDIATELY update UI optimistic cancel
_eventSubscription?.cancel();
_eventSubscription = null;
@ -483,9 +547,10 @@ class ChatNotifier extends StateNotifier<ChatState> {
return sendMessage(message);
}
// 1. Cancel current event subscription
// 1. Cancel current event subscription and flush buffered tokens
_eventSubscription?.cancel();
_eventSubscription = null;
_flushBuffersSync();
// 2. Mark executing tools as completed
final updatedMessages = state.messages.map((m) {
@ -563,12 +628,17 @@ class ChatNotifier extends StateNotifier<ChatState> {
void clearChat() {
_eventSubscription?.cancel();
_flushTimer?.cancel();
_flushTimer = null;
_textBuffer.clear();
_thinkingBuffer.clear();
state = const ChatState();
}
@override
void dispose() {
_eventSubscription?.cancel();
_flushTimer?.cancel();
_ref.read(webSocketClientProvider).disconnect();
super.dispose();
}

View File

@ -1,5 +1,5 @@
import 'package:flutter/material.dart';
import 'package:flutter_markdown/flutter_markdown.dart';
import 'package:gpt_markdown/gpt_markdown.dart';
import '../../../../core/theme/app_colors.dart';
import '../../domain/entities/chat_message.dart';
@ -58,10 +58,11 @@ class MessageBubble extends StatelessWidget {
),
)
else
MarkdownBody(
data: message.content,
selectable: true,
styleSheet: _markdownStyleSheet(context),
SelectionArea(
child: GptMarkdown(
message.content,
style: const TextStyle(color: AppColors.textPrimary, fontSize: 15),
),
),
// Timestamp
@ -83,37 +84,6 @@ class MessageBubble extends StatelessWidget {
);
}
MarkdownStyleSheet _markdownStyleSheet(BuildContext context) {
return MarkdownStyleSheet(
p: const TextStyle(color: AppColors.textPrimary, fontSize: 15),
h1: const TextStyle(color: AppColors.textPrimary, fontSize: 22, fontWeight: FontWeight.bold),
h2: const TextStyle(color: AppColors.textPrimary, fontSize: 19, fontWeight: FontWeight.bold),
h3: const TextStyle(color: AppColors.textPrimary, fontSize: 17, fontWeight: FontWeight.w600),
strong: const TextStyle(color: AppColors.textPrimary, fontWeight: FontWeight.bold),
em: const TextStyle(color: AppColors.textSecondary, fontStyle: FontStyle.italic),
code: TextStyle(
color: AppColors.secondary,
backgroundColor: AppColors.background.withOpacity(0.5),
fontSize: 13,
fontFamily: 'monospace',
),
codeblockDecoration: BoxDecoration(
color: AppColors.background.withOpacity(0.6),
borderRadius: BorderRadius.circular(8),
),
codeblockPadding: const EdgeInsets.all(10),
blockquoteDecoration: BoxDecoration(
border: Border(left: BorderSide(color: AppColors.primary, width: 3)),
),
blockquotePadding: const EdgeInsets.only(left: 12, top: 4, bottom: 4),
tableBorder: TableBorder.all(color: AppColors.surfaceLight, width: 0.5),
tableHead: const TextStyle(color: AppColors.textPrimary, fontWeight: FontWeight.bold, fontSize: 13),
tableBody: const TextStyle(color: AppColors.textSecondary, fontSize: 13),
tableCellsPadding: const EdgeInsets.symmetric(horizontal: 8, vertical: 4),
listBullet: const TextStyle(color: AppColors.textSecondary, fontSize: 15),
);
}
String _formatTime(DateTime time) {
final hour = time.hour.toString().padLeft(2, '0');
final minute = time.minute.toString().padLeft(2, '0');

View File

@ -1,11 +1,12 @@
import 'package:flutter/material.dart';
import 'package:flutter_markdown/flutter_markdown.dart';
import 'package:gpt_markdown/gpt_markdown.dart';
import '../../../../core/theme/app_colors.dart';
/// Widget that renders streaming text with an animated cursor at the end,
/// giving the appearance of real-time text generation.
/// When streaming completes, renders Markdown.
class StreamTextWidget extends StatefulWidget {
/// Widget that renders streaming text as real-time Markdown.
///
/// During streaming: renders Markdown with a solid block cursor at the end.
/// When streaming completes: renders Markdown with text selection enabled.
class StreamTextWidget extends StatelessWidget {
final String text;
final bool isStreaming;
final TextStyle? style;
@ -17,89 +18,23 @@ class StreamTextWidget extends StatefulWidget {
this.style,
});
@override
State<StreamTextWidget> createState() => _StreamTextWidgetState();
}
class _StreamTextWidgetState extends State<StreamTextWidget>
with SingleTickerProviderStateMixin {
late AnimationController _cursorController;
@override
void initState() {
super.initState();
_cursorController = AnimationController(
vsync: this,
duration: const Duration(milliseconds: 600),
)..repeat(reverse: true);
}
@override
void dispose() {
_cursorController.dispose();
super.dispose();
}
@override
Widget build(BuildContext context) {
final defaultStyle = TextStyle(
color: AppColors.textPrimary,
fontSize: 15,
);
final effectiveStyle = widget.style ?? defaultStyle;
// Streaming complete render full Markdown
if (!widget.isStreaming) {
return MarkdownBody(
data: widget.text,
selectable: true,
styleSheet: MarkdownStyleSheet(
p: effectiveStyle,
h1: effectiveStyle.copyWith(fontSize: 22, fontWeight: FontWeight.bold),
h2: effectiveStyle.copyWith(fontSize: 19, fontWeight: FontWeight.bold),
h3: effectiveStyle.copyWith(fontSize: 17, fontWeight: FontWeight.w600),
strong: effectiveStyle.copyWith(fontWeight: FontWeight.bold),
em: effectiveStyle.copyWith(fontStyle: FontStyle.italic, color: AppColors.textSecondary),
code: TextStyle(
color: AppColors.secondary,
backgroundColor: AppColors.background.withOpacity(0.5),
fontSize: 13,
fontFamily: 'monospace',
),
codeblockDecoration: BoxDecoration(
color: AppColors.background.withOpacity(0.6),
borderRadius: BorderRadius.circular(8),
),
codeblockPadding: const EdgeInsets.all(10),
tableBorder: TableBorder.all(color: AppColors.surfaceLight, width: 0.5),
tableHead: effectiveStyle.copyWith(fontWeight: FontWeight.bold, fontSize: 13),
tableBody: TextStyle(color: AppColors.textSecondary, fontSize: 13),
tableCellsPadding: const EdgeInsets.symmetric(horizontal: 8, vertical: 4),
),
);
}
// Still streaming show plain text with blinking cursor
return AnimatedBuilder(
animation: _cursorController,
builder: (context, _) {
return RichText(
text: TextSpan(
style: effectiveStyle,
children: [
TextSpan(text: widget.text),
TextSpan(
text: '\u2588', // Block cursor character
style: effectiveStyle.copyWith(
color: effectiveStyle.color?.withOpacity(
_cursorController.value,
),
),
),
],
),
final effectiveStyle = style ??
const TextStyle(
color: AppColors.textPrimary,
fontSize: 15,
);
},
// Streaming: append solid block cursor; completed: plain markdown
final displayText = isStreaming ? '$text\u2588' : text;
return RepaintBoundary(
child: isStreaming
? GptMarkdown(displayText, style: effectiveStyle)
: SelectionArea(
child: GptMarkdown(displayText, style: effectiveStyle),
),
);
}
}

View File

@ -185,6 +185,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "3.1.2"
cross_file:
dependency: transitive
description:
name: cross_file
sha256: "28bb3ae56f117b5aec029d702a90f57d285cd975c3c5c281eaca38dbc47c5937"
url: "https://pub.dev"
source: hosted
version: "0.3.5+2"
crypto:
dependency: "direct main"
description:
@ -281,6 +289,38 @@ packages:
url: "https://pub.dev"
source: hosted
version: "7.0.1"
file_selector_linux:
dependency: transitive
description:
name: file_selector_linux
sha256: "2567f398e06ac72dcf2e98a0c95df2a9edd03c2c2e0cacd4780f20cdf56263a0"
url: "https://pub.dev"
source: hosted
version: "0.9.4"
file_selector_macos:
dependency: transitive
description:
name: file_selector_macos
sha256: "5e0bbe9c312416f1787a68259ea1505b52f258c587f12920422671807c4d618a"
url: "https://pub.dev"
source: hosted
version: "0.9.5"
file_selector_platform_interface:
dependency: transitive
description:
name: file_selector_platform_interface
sha256: "35e0bd61ebcdb91a3505813b055b09b79dfdc7d0aee9c09a7ba59ae4bb13dc85"
url: "https://pub.dev"
source: hosted
version: "2.7.0"
file_selector_windows:
dependency: transitive
description:
name: file_selector_windows
sha256: "62197474ae75893a62df75939c777763d39c2bc5f73ce5b88497208bc269abfd"
url: "https://pub.dev"
source: hosted
version: "0.9.3+5"
fixnum:
dependency: transitive
description:
@ -342,14 +382,22 @@ packages:
url: "https://pub.dev"
source: hosted
version: "8.0.0"
flutter_markdown:
dependency: "direct main"
flutter_math_fork:
dependency: transitive
description:
name: flutter_markdown
sha256: "08fb8315236099ff8e90cb87bb2b935e0a724a3af1623000a9cec930468e0f27"
name: flutter_math_fork
sha256: "6d5f2f1aa57ae539ffb0a04bb39d2da67af74601d685a161aff7ce5bda5fa407"
url: "https://pub.dev"
source: hosted
version: "0.7.7+1"
version: "0.7.4"
flutter_plugin_android_lifecycle:
dependency: transitive
description:
name: flutter_plugin_android_lifecycle
sha256: ee8068e0e1cd16c4a82714119918efdeed33b3ba7772c54b5d094ab53f9b7fd1
url: "https://pub.dev"
source: hosted
version: "2.0.33"
flutter_riverpod:
dependency: "direct main"
description:
@ -496,6 +544,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "14.8.1"
gpt_markdown:
dependency: "direct main"
description:
name: gpt_markdown
sha256: "9b88dfaffea644070b648c204ca4a55745a49f4ad0b58ed0ab70913ad593c7a1"
url: "https://pub.dev"
source: hosted
version: "1.1.5"
graphs:
dependency: transitive
description:
@ -552,6 +608,70 @@ packages:
url: "https://pub.dev"
source: hosted
version: "4.8.0"
image_picker:
dependency: "direct main"
description:
name: image_picker
sha256: "784210112be18ea55f69d7076e2c656a4e24949fa9e76429fe53af0c0f4fa320"
url: "https://pub.dev"
source: hosted
version: "1.2.1"
image_picker_android:
dependency: transitive
description:
name: image_picker_android
sha256: eda9b91b7e266d9041084a42d605a74937d996b87083395c5e47835916a86156
url: "https://pub.dev"
source: hosted
version: "0.8.13+14"
image_picker_for_web:
dependency: transitive
description:
name: image_picker_for_web
sha256: "66257a3191ab360d23a55c8241c91a6e329d31e94efa7be9cf7a212e65850214"
url: "https://pub.dev"
source: hosted
version: "3.1.1"
image_picker_ios:
dependency: transitive
description:
name: image_picker_ios
sha256: b9c4a438a9ff4f60808c9cf0039b93a42bb6c2211ef6ebb647394b2b3fa84588
url: "https://pub.dev"
source: hosted
version: "0.8.13+6"
image_picker_linux:
dependency: transitive
description:
name: image_picker_linux
sha256: "1f81c5f2046b9ab724f85523e4af65be1d47b038160a8c8deed909762c308ed4"
url: "https://pub.dev"
source: hosted
version: "0.2.2"
image_picker_macos:
dependency: transitive
description:
name: image_picker_macos
sha256: "86f0f15a309de7e1a552c12df9ce5b59fe927e71385329355aec4776c6a8ec91"
url: "https://pub.dev"
source: hosted
version: "0.2.2+1"
image_picker_platform_interface:
dependency: transitive
description:
name: image_picker_platform_interface
sha256: "567e056716333a1647c64bb6bd873cff7622233a5c3f694be28a583d4715690c"
url: "https://pub.dev"
source: hosted
version: "2.11.1"
image_picker_windows:
dependency: transitive
description:
name: image_picker_windows
sha256: d248c86554a72b5495a31c56f060cf73a41c7ff541689327b1a7dbccc33adfae
url: "https://pub.dev"
source: hosted
version: "0.2.2"
intl:
dependency: "direct main"
description:
@ -640,14 +760,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.3.0"
markdown:
dependency: transitive
description:
name: markdown
sha256: "935e23e1ff3bc02d390bad4d4be001208ee92cc217cb5b5a6c19bc14aaa318c1"
url: "https://pub.dev"
source: hosted
version: "7.3.0"
matcher:
dependency: transitive
description:
@ -688,6 +800,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "5.4.6"
nested:
dependency: transitive
description:
name: nested
sha256: "03bac4c528c64c95c722ec99280375a6f2fc708eec17c7b3f07253b626cd2a20"
url: "https://pub.dev"
source: hosted
version: "1.0.0"
package_config:
dependency: transitive
description:
@ -864,6 +984,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "6.5.0"
provider:
dependency: transitive
description:
name: provider
sha256: "4e82183fa20e5ca25703ead7e05de9e4cceed1fbd1eadc1ac3cb6f565a09f272"
url: "https://pub.dev"
source: hosted
version: "6.1.5+1"
pub_semver:
dependency: transitive
description:
@ -1237,6 +1365,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.0.2"
tuple:
dependency: transitive
description:
name: tuple
sha256: a97ce2013f240b2f3807bcbaf218765b6f301c3eff91092bcfa23a039e7dd151
url: "https://pub.dev"
source: hosted
version: "2.0.2"
typed_data:
dependency: transitive
description:

View File

@ -34,8 +34,9 @@ dependencies:
# UI
fl_chart: ^0.67.0
flutter_markdown: ^0.7.0
gpt_markdown: ^1.1.5
flutter_svg: ^2.0.10+1
image_picker: ^1.1.2
# Voice
record: ^6.0.0

View File

@ -5,4 +5,9 @@ export class ExecuteTaskDto {
maxTurns?: number;
maxBudgetUsd?: number;
skill?: { name: string; arguments: string };
attachments?: Array<{
base64Data: string;
mediaType: string;
fileName?: string;
}>;
}

View File

@ -23,6 +23,13 @@ export class ConversationMessage {
@Column({ type: 'jsonb', nullable: true })
toolResults?: any[];
@Column({ type: 'jsonb', nullable: true })
attachments?: Array<{
mediaType: string;
base64Data: string;
fileName?: string;
}>;
@Column({ type: 'int', nullable: true })
tokenCount?: number;

View File

@ -18,7 +18,11 @@ export class ConversationContextService {
/**
* Save a user message to the conversation history.
*/
async saveUserMessage(sessionId: string, content: string): Promise<ConversationMessage> {
async saveUserMessage(
sessionId: string,
content: string,
attachments?: Array<{ base64Data: string; mediaType: string; fileName?: string }>,
): Promise<ConversationMessage> {
const tenantId = TenantContextService.getTenantId();
const sequenceNumber = await this.messageRepository.getNextSequenceNumber(sessionId);
@ -28,6 +32,7 @@ export class ConversationContextService {
message.sessionId = sessionId;
message.role = 'user';
message.content = content;
message.attachments = attachments;
message.tokenCount = this.estimateTokens(content);
message.sequenceNumber = sequenceNumber;
message.createdAt = new Date();
@ -81,7 +86,26 @@ export class ConversationContextService {
for (const msg of messages) {
if (msg.role === 'user') {
history.push({ role: 'user', content: msg.content });
if (msg.attachments && msg.attachments.length > 0) {
// Build multimodal content blocks for messages with images
const contentBlocks: any[] = [];
for (const att of msg.attachments) {
contentBlocks.push({
type: 'image',
source: {
type: 'base64',
media_type: att.mediaType,
data: att.base64Data,
},
});
}
if (msg.content && msg.content !== '[图片]') {
contentBlocks.push({ type: 'text', text: msg.content });
}
history.push({ role: 'user', content: contentBlocks });
} else {
history.push({ role: 'user', content: msg.content });
}
} else if (msg.role === 'assistant') {
// If the assistant message has tool calls, build content blocks
if (msg.toolCalls && msg.toolCalls.length > 0) {

View File

@ -59,10 +59,14 @@ export class ClaudeApiEngine implements AgentEnginePort {
const tools = this.buildToolDefinitions(params.allowedTools);
// Initialize conversation with history + user prompt
const messages: AnthropicMessage[] = [
...(params.conversationHistory || []),
{ role: 'user', content: params.prompt },
];
// When history already ends with the current user message (e.g. multimodal with image blocks),
// don't add a duplicate plain-text user message.
const history = params.conversationHistory || [];
const lastHistoryMsg = history.length > 0 ? history[history.length - 1] : null;
const historyEndsWithUser = lastHistoryMsg?.role === 'user' && Array.isArray(lastHistoryMsg.content);
const messages: AnthropicMessage[] = historyEndsWithUser
? [...history]
: [...history, { role: 'user', content: params.prompt }];
let totalTokensUsed = 0;
let turnCount = 0;

View File

@ -38,6 +38,7 @@ export class AgentController {
allowedTools?: string[];
engineType?: string;
maxContextMessages?: number;
attachments?: Array<{ base64Data: string; mediaType: string; fileName?: string }>;
},
) {
// Allow callers to override the engine (e.g. voice uses claude_api for streaming)
@ -85,15 +86,19 @@ export class AgentController {
await this.taskRepository.save(task);
// Save user message to conversation history
await this.contextService.saveUserMessage(session.id, body.prompt);
await this.contextService.saveUserMessage(session.id, body.prompt, body.attachments);
// Load conversation history for context
const maxCtx = body.maxContextMessages ?? 20;
const conversationHistory = await this.contextService.loadContext(session.id, maxCtx);
this.logger.log(`[Task ${task.id}] Loaded ${conversationHistory.length} history messages for session=${session.id}`);
// Pass conversation history (excluding the current user message, which is the last one)
const historyForEngine = conversationHistory.slice(0, -1);
// When the current message has attachments, keep it in history (it has image content blocks).
// Otherwise, strip it so the engine adds a plain-text user message.
const hasAttachments = body.attachments && body.attachments.length > 0;
const historyForEngine = hasAttachments
? conversationHistory // includes current user message with image blocks
: conversationHistory.slice(0, -1);
// For SDK engine: load previous SDK session ID for native resume
const isSdkEngine = engine.engineType === AgentEngineType.CLAUDE_AGENT_SDK;

View File

@ -2,6 +2,7 @@ import { NestFactory } from '@nestjs/core';
import { Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { WsAdapter } from '@nestjs/platform-ws';
import * as express from 'express';
import { AgentModule } from './agent.module';
const logger = new Logger('AgentService');
@ -16,6 +17,8 @@ process.on('uncaughtException', (error) => {
async function bootstrap() {
const app = await NestFactory.create(AgentModule);
// Increase body parser limit for base64 image attachments
app.use(express.json({ limit: '10mb' }));
// Use raw WebSocket adapter instead of Socket.IO
app.useWebSocketAdapter(new WsAdapter(app));
const config = app.get(ConfigService);