From 55b983a9505a017991b6773dc624a25d63bd48eb Mon Sep 17 00:00:00 2001 From: hailin Date: Fri, 6 Mar 2026 03:20:41 -0800 Subject: [PATCH] feat(it0_app): add WhatsApp-style voice message with async agent interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New VoiceMicButton widget (press-and-hold to record, release to send): - Records audio to a temp .m4a file via the `record` package - Slide-up gesture cancels recording without sending - Pulsing red mic icon + "松开发送/松开取消" feedback during recording New flow for voice messages: 1. Temp "🎤 识别中..." bubble shown immediately 2. Audio uploaded to POST /api/v1/agent/sessions/:id/voice-message (multipart/form-data; backend runs Whisper STT) 3. Placeholder replaced with real transcript 4. WS stream subscribed via new subscribeExistingTask() to receive agent's streaming response — same pipeline as text chat Voice messages act as async interrupts: if the agent is mid-task the backend hard-cancels it before processing the new voice command, so whoever presses the mic button always takes priority. Files changed: chat_remote_datasource.dart — sendVoiceMessage() multipart upload chat_repository.dart — subscribeExistingTask() interface method chat_repository_impl.dart — implement subscribeExistingTask(); fix sendVoiceMessage() stub chat_providers.dart — ChatNotifier.sendVoiceMessage() voice_mic_button.dart — NEW press-and-hold recording widget chat_page.dart — mic button added to input area Co-Authored-By: Claude Sonnet 4.6 --- .../datasources/chat_remote_datasource.dart | 23 +++ .../repositories/chat_repository_impl.dart | 32 ++- .../domain/repositories/chat_repository.dart | 8 + .../chat/presentation/pages/chat_page.dart | 23 ++- .../providers/chat_providers.dart | 85 ++++++++ .../widgets/voice_mic_button.dart | 187 ++++++++++++++++++ 6 files changed, 333 insertions(+), 25 deletions(-) create mode 100644 it0_app/lib/features/chat/presentation/widgets/voice_mic_button.dart diff --git a/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart b/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart index e2ca16f..2bed529 100644 --- a/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart +++ b/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart @@ -149,4 +149,27 @@ class ChatRemoteDatasource { }, ); } + + /// Uploads an audio file to the voice-message endpoint. + /// Backend performs STT, interrupts any running task if needed, and + /// starts a new agent task with the transcript. + /// Returns { sessionId, taskId, transcript }. + Future> sendVoiceMessage({ + required String sessionId, + required String audioPath, + String language = 'zh', + }) async { + final formData = FormData.fromMap({ + 'audio': await MultipartFile.fromFile( + audioPath, + filename: audioPath.split('/').last, + ), + 'language': language, + }); + final response = await _dio.post( + '${ApiEndpoints.sessions}/$sessionId/voice-message', + data: formData, + ); + return response.data as Map; + } } diff --git a/it0_app/lib/features/chat/data/repositories/chat_repository_impl.dart b/it0_app/lib/features/chat/data/repositories/chat_repository_impl.dart index 3eff289..f7c5211 100644 --- a/it0_app/lib/features/chat/data/repositories/chat_repository_impl.dart +++ b/it0_app/lib/features/chat/data/repositories/chat_repository_impl.dart @@ -82,28 +82,22 @@ class ChatRepositoryImpl implements ChatRepository { required String sessionId, required String audioPath, }) async* { - // For voice messages, we POST the audio file, then subscribe to WebSocket events - // similar to sendMessage. The backend handles STT + agent processing. - final response = await _remoteDatasource.createTask( - sessionId: sessionId, - message: '[voice_input]', - attachments: [{'filePath': audioPath, 'mediaType': 'audio/wav'}], - ); + // Kept for interface compatibility — ChatNotifier calls sendVoiceMessage + // on the datasource directly (to get the transcript), then calls + // subscribeExistingTask. This method is not used. + yield* subscribeExistingTask(sessionId: sessionId, taskId: ''); + } - final returnedSessionId = response['sessionId'] as String? ?? - response['session_id'] as String? ?? - sessionId; - final taskId = response['taskId'] as String? ?? response['task_id'] as String?; - - // Emit the real sessionId and taskId so the notifier can capture them - yield SessionInfoEvent(returnedSessionId); - if (taskId != null) yield TaskInfoEvent(taskId); - - final voiceToken = await _getAccessToken(); - await _webSocketClient.connect('/ws/agent', token: voiceToken); + @override + Stream subscribeExistingTask({ + required String sessionId, + required String taskId, + }) async* { + final token = await _getAccessToken(); + await _webSocketClient.connect('/ws/agent', token: token); _webSocketClient.send({ 'event': 'subscribe_session', - 'data': {'sessionId': returnedSessionId, 'taskId': taskId}, + 'data': {'sessionId': sessionId, 'taskId': taskId}, }); yield* _webSocketClient.messages.transform( diff --git a/it0_app/lib/features/chat/domain/repositories/chat_repository.dart b/it0_app/lib/features/chat/domain/repositories/chat_repository.dart index eeb694c..5efdaf2 100644 --- a/it0_app/lib/features/chat/domain/repositories/chat_repository.dart +++ b/it0_app/lib/features/chat/domain/repositories/chat_repository.dart @@ -33,6 +33,14 @@ abstract class ChatRepository { required String message, }); + /// Subscribes to an already-running agent task's WS stream without + /// re-submitting a prompt. Used after a voice message upload to receive + /// the agent's streamed response. + Stream subscribeExistingTask({ + required String sessionId, + required String taskId, + }); + /// Confirms a standing order draft proposed by the agent. Future confirmStandingOrder( String sessionId, diff --git a/it0_app/lib/features/chat/presentation/pages/chat_page.dart b/it0_app/lib/features/chat/presentation/pages/chat_page.dart index 047b087..5256488 100644 --- a/it0_app/lib/features/chat/presentation/pages/chat_page.dart +++ b/it0_app/lib/features/chat/presentation/pages/chat_page.dart @@ -12,6 +12,7 @@ import '../widgets/stream_text_widget.dart'; import '../widgets/approval_action_card.dart'; import '../widgets/conversation_drawer.dart'; import '../../../agent_call/presentation/pages/agent_call_page.dart'; +import '../widgets/voice_mic_button.dart'; // --------------------------------------------------------------------------- // Chat page – Timeline workflow style (inspired by Claude Code VSCode) @@ -698,12 +699,22 @@ class _ChatPageState extends ConsumerState { ], ) else - Padding( - padding: const EdgeInsets.only(right: 4), - child: IconButton( - icon: const Icon(Icons.send, size: 20), - onPressed: isAwaitingApproval ? null : _send, - ), + Row( + mainAxisSize: MainAxisSize.min, + children: [ + VoiceMicButton( + disabled: isAwaitingApproval, + onAudioReady: (path) => + ref.read(chatProvider.notifier).sendVoiceMessage(path), + ), + Padding( + padding: const EdgeInsets.only(right: 4), + child: IconButton( + icon: const Icon(Icons.send, size: 20), + onPressed: isAwaitingApproval ? null : _send, + ), + ), + ], ), ], ), diff --git a/it0_app/lib/features/chat/presentation/providers/chat_providers.dart b/it0_app/lib/features/chat/presentation/providers/chat_providers.dart index 12a698b..ae3eac7 100644 --- a/it0_app/lib/features/chat/presentation/providers/chat_providers.dart +++ b/it0_app/lib/features/chat/presentation/providers/chat_providers.dart @@ -488,6 +488,91 @@ class ChatNotifier extends StateNotifier { } } + /// Sends a recorded audio file as a voice message. + /// + /// Flow: + /// 1. Shows a temporary "识别中..." user message bubble. + /// 2. Uploads audio to the backend voice-message endpoint. + /// Backend runs Whisper STT, optionally interrupts any running task, + /// and starts a new agent task with the transcript. + /// 3. Replaces the placeholder with the real transcript. + /// 4. Subscribes to the WS stream for the new task. + Future sendVoiceMessage(String audioPath) async { + final tempId = '${DateTime.now().microsecondsSinceEpoch}_voice'; + final tempMsg = ChatMessage( + id: tempId, + role: MessageRole.user, + content: '🎤 识别中...', + timestamp: DateTime.now(), + type: MessageType.text, + ); + + // Cancel any ongoing subscription (voice message acts as interrupt) + _eventSubscription?.cancel(); + _eventSubscription = null; + _flushBuffersSync(); + + state = state.copyWith( + messages: [...state.messages, tempMsg], + agentStatus: AgentStatus.thinking, + error: null, + ); + + try { + final datasource = _ref.read(chatRemoteDatasourceProvider); + final sessionId = state.sessionId ?? 'new'; + + final result = await datasource.sendVoiceMessage( + sessionId: sessionId, + audioPath: audioPath, + ); + + final returnedSessionId = result['sessionId'] as String? ?? sessionId; + final taskId = result['taskId'] as String?; + final transcript = result['transcript'] as String? ?? '🎤'; + + // Replace placeholder with real transcript + final updatedMessages = state.messages + .map((m) => m.id == tempId ? m.copyWith(content: transcript) : m) + .toList(); + + state = state.copyWith( + messages: updatedMessages, + sessionId: returnedSessionId, + taskId: taskId, + ); + + // Subscribe to the WS stream for the running task + final repo = _ref.read(chatRepositoryProvider); + final stream = repo.subscribeExistingTask( + sessionId: returnedSessionId, + taskId: taskId ?? '', + ); + + _eventSubscription = stream.listen( + (event) => _handleStreamEvent(event), + onError: (error) { + state = state.copyWith( + agentStatus: AgentStatus.error, + error: '语音消息处理失败: $error', + ); + }, + onDone: () { + if (state.agentStatus != AgentStatus.error) { + state = state.copyWith(agentStatus: AgentStatus.idle); + } + }, + ); + } catch (e) { + // Remove placeholder on failure + state = state.copyWith( + messages: state.messages.where((m) => m.id != tempId).toList(), + agentStatus: AgentStatus.error, + error: '语音识别失败: $e', + ); + } + } + Future cancelCurrentTask() async { final taskId = state.taskId; if (taskId == null && state.sessionId == null) return; diff --git a/it0_app/lib/features/chat/presentation/widgets/voice_mic_button.dart b/it0_app/lib/features/chat/presentation/widgets/voice_mic_button.dart new file mode 100644 index 0000000..ad07429 --- /dev/null +++ b/it0_app/lib/features/chat/presentation/widgets/voice_mic_button.dart @@ -0,0 +1,187 @@ +import 'dart:io'; +import 'package:flutter/material.dart'; +import 'package:path_provider/path_provider.dart'; +import 'package:record/record.dart'; +import '../../../../core/theme/app_colors.dart'; + +/// WhatsApp-style press-and-hold mic button. +/// +/// • Press and hold → records audio to a temp file. +/// • Release → stops recording and calls [onAudioReady] with the file path. +/// • Slide up while holding → cancels recording without sending. +/// +/// Requires microphone permission (handled by the `record` package). +class VoiceMicButton extends StatefulWidget { + /// Called with the temp file path when the user releases the button. + final void Function(String audioPath) onAudioReady; + + /// Whether the button should be disabled (e.g. awaiting approval). + final bool disabled; + + const VoiceMicButton({ + super.key, + required this.onAudioReady, + this.disabled = false, + }); + + @override + State createState() => _VoiceMicButtonState(); +} + +class _VoiceMicButtonState extends State + with SingleTickerProviderStateMixin { + final _recorder = AudioRecorder(); + bool _isRecording = false; + bool _cancelled = false; + + // Slide-up cancel threshold (pixels above press origin) + static const double _cancelThreshold = 60.0; + Offset? _pressOrigin; + + late final AnimationController _pulseController; + late final Animation _pulseAnimation; + + @override + void initState() { + super.initState(); + _pulseController = AnimationController( + vsync: this, + duration: const Duration(milliseconds: 800), + ); + _pulseAnimation = Tween(begin: 1.0, end: 1.25).animate( + CurvedAnimation(parent: _pulseController, curve: Curves.easeInOut), + ); + } + + @override + void dispose() { + _recorder.dispose(); + _pulseController.dispose(); + super.dispose(); + } + + Future _startRecording() async { + final hasPermission = await _recorder.hasPermission(); + if (!hasPermission) { + if (mounted) { + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar(content: Text('需要麦克风权限')), + ); + } + return; + } + + final dir = await getTemporaryDirectory(); + final path = '${dir.path}/voice_${DateTime.now().millisecondsSinceEpoch}.m4a'; + + await _recorder.start( + const RecordConfig(encoder: AudioEncoder.aacLc, sampleRate: 16000), + path: path, + ); + + setState(() { + _isRecording = true; + _cancelled = false; + }); + _pulseController.repeat(reverse: true); + } + + Future _stopRecording({required bool cancel}) async { + if (!_isRecording) return; + + _pulseController.stop(); + _pulseController.reset(); + + final path = await _recorder.stop(); + setState(() => _isRecording = false); + + if (cancel || path == null) return; + + // Ignore empty recordings (< ~0.3s) + try { + final size = await File(path).length(); + if (size < 2048) return; + } catch (_) { + return; + } + + widget.onAudioReady(path); + } + + void _onLongPressStart(LongPressStartDetails details) { + if (widget.disabled) return; + _pressOrigin = details.globalPosition; + _startRecording(); + } + + void _onLongPressMoveUpdate(LongPressMoveUpdateDetails details) { + if (_pressOrigin == null || !_isRecording) return; + final dy = _pressOrigin!.dy - details.globalPosition.dy; + setState(() => _cancelled = dy > _cancelThreshold); + } + + void _onLongPressEnd(LongPressEndDetails details) { + _stopRecording(cancel: _cancelled); + _pressOrigin = null; + } + + void _onLongPressCancel() { + _stopRecording(cancel: true); + _pressOrigin = null; + } + + @override + Widget build(BuildContext context) { + if (_isRecording) { + return _buildRecordingButton(); + } + return GestureDetector( + onLongPressStart: _onLongPressStart, + onLongPressMoveUpdate: _onLongPressMoveUpdate, + onLongPressEnd: _onLongPressEnd, + onLongPressCancel: _onLongPressCancel, + child: IconButton( + icon: Icon( + Icons.mic_none, + size: 22, + color: widget.disabled ? AppColors.textMuted : null, + ), + tooltip: '按住录音', + onPressed: widget.disabled ? null : () {}, + ), + ); + } + + Widget _buildRecordingButton() { + final isCancelling = _cancelled; + return GestureDetector( + onLongPressMoveUpdate: _onLongPressMoveUpdate, + onLongPressEnd: _onLongPressEnd, + onLongPressCancel: _onLongPressCancel, + child: Padding( + padding: const EdgeInsets.symmetric(horizontal: 4, vertical: 4), + child: Row( + mainAxisSize: MainAxisSize.min, + children: [ + ScaleTransition( + scale: _pulseAnimation, + child: Icon( + Icons.mic, + size: 22, + color: isCancelling ? AppColors.textMuted : AppColors.error, + ), + ), + const SizedBox(width: 4), + Text( + isCancelling ? '松开取消' : '松开发送', + style: TextStyle( + fontSize: 12, + color: isCancelling ? AppColors.textMuted : AppColors.error, + ), + ), + ], + ), + ), + ); + } +}