feat(it0_app): add WhatsApp-style voice message with async agent interrupt
New VoiceMicButton widget (press-and-hold to record, release to send):
- Records audio to a temp .m4a file via the `record` package
- Slide-up gesture cancels recording without sending
- Pulsing red mic icon + "松开发送/松开取消" feedback during recording
New flow for voice messages:
1. Temp "🎤 识别中..." bubble shown immediately
2. Audio uploaded to POST /api/v1/agent/sessions/:id/voice-message
(multipart/form-data; backend runs Whisper STT)
3. Placeholder replaced with real transcript
4. WS stream subscribed via new subscribeExistingTask() to receive
agent's streaming response — same pipeline as text chat
Voice messages act as async interrupts: if the agent is mid-task the
backend hard-cancels it before processing the new voice command,
so whoever presses the mic button always takes priority.
Files changed:
chat_remote_datasource.dart — sendVoiceMessage() multipart upload
chat_repository.dart — subscribeExistingTask() interface method
chat_repository_impl.dart — implement subscribeExistingTask(); fix
sendVoiceMessage() stub
chat_providers.dart — ChatNotifier.sendVoiceMessage()
voice_mic_button.dart — NEW press-and-hold recording widget
chat_page.dart — mic button added to input area
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a2af76bcd7
commit
55b983a950
|
|
@ -149,4 +149,27 @@ class ChatRemoteDatasource {
|
|||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Uploads an audio file to the voice-message endpoint.
|
||||
/// Backend performs STT, interrupts any running task if needed, and
|
||||
/// starts a new agent task with the transcript.
|
||||
/// Returns { sessionId, taskId, transcript }.
|
||||
Future<Map<String, dynamic>> sendVoiceMessage({
|
||||
required String sessionId,
|
||||
required String audioPath,
|
||||
String language = 'zh',
|
||||
}) async {
|
||||
final formData = FormData.fromMap({
|
||||
'audio': await MultipartFile.fromFile(
|
||||
audioPath,
|
||||
filename: audioPath.split('/').last,
|
||||
),
|
||||
'language': language,
|
||||
});
|
||||
final response = await _dio.post(
|
||||
'${ApiEndpoints.sessions}/$sessionId/voice-message',
|
||||
data: formData,
|
||||
);
|
||||
return response.data as Map<String, dynamic>;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,28 +82,22 @@ class ChatRepositoryImpl implements ChatRepository {
|
|||
required String sessionId,
|
||||
required String audioPath,
|
||||
}) async* {
|
||||
// For voice messages, we POST the audio file, then subscribe to WebSocket events
|
||||
// similar to sendMessage. The backend handles STT + agent processing.
|
||||
final response = await _remoteDatasource.createTask(
|
||||
sessionId: sessionId,
|
||||
message: '[voice_input]',
|
||||
attachments: [{'filePath': audioPath, 'mediaType': 'audio/wav'}],
|
||||
);
|
||||
// Kept for interface compatibility — ChatNotifier calls sendVoiceMessage
|
||||
// on the datasource directly (to get the transcript), then calls
|
||||
// subscribeExistingTask. This method is not used.
|
||||
yield* subscribeExistingTask(sessionId: sessionId, taskId: '');
|
||||
}
|
||||
|
||||
final returnedSessionId = response['sessionId'] as String? ??
|
||||
response['session_id'] as String? ??
|
||||
sessionId;
|
||||
final taskId = response['taskId'] as String? ?? response['task_id'] as String?;
|
||||
|
||||
// Emit the real sessionId and taskId so the notifier can capture them
|
||||
yield SessionInfoEvent(returnedSessionId);
|
||||
if (taskId != null) yield TaskInfoEvent(taskId);
|
||||
|
||||
final voiceToken = await _getAccessToken();
|
||||
await _webSocketClient.connect('/ws/agent', token: voiceToken);
|
||||
@override
|
||||
Stream<StreamEvent> subscribeExistingTask({
|
||||
required String sessionId,
|
||||
required String taskId,
|
||||
}) async* {
|
||||
final token = await _getAccessToken();
|
||||
await _webSocketClient.connect('/ws/agent', token: token);
|
||||
_webSocketClient.send({
|
||||
'event': 'subscribe_session',
|
||||
'data': {'sessionId': returnedSessionId, 'taskId': taskId},
|
||||
'data': {'sessionId': sessionId, 'taskId': taskId},
|
||||
});
|
||||
|
||||
yield* _webSocketClient.messages.transform(
|
||||
|
|
|
|||
|
|
@ -33,6 +33,14 @@ abstract class ChatRepository {
|
|||
required String message,
|
||||
});
|
||||
|
||||
/// Subscribes to an already-running agent task's WS stream without
|
||||
/// re-submitting a prompt. Used after a voice message upload to receive
|
||||
/// the agent's streamed response.
|
||||
Stream<StreamEvent> subscribeExistingTask({
|
||||
required String sessionId,
|
||||
required String taskId,
|
||||
});
|
||||
|
||||
/// Confirms a standing order draft proposed by the agent.
|
||||
Future<void> confirmStandingOrder(
|
||||
String sessionId,
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import '../widgets/stream_text_widget.dart';
|
|||
import '../widgets/approval_action_card.dart';
|
||||
import '../widgets/conversation_drawer.dart';
|
||||
import '../../../agent_call/presentation/pages/agent_call_page.dart';
|
||||
import '../widgets/voice_mic_button.dart';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Chat page – Timeline workflow style (inspired by Claude Code VSCode)
|
||||
|
|
@ -698,6 +699,14 @@ class _ChatPageState extends ConsumerState<ChatPage> {
|
|||
],
|
||||
)
|
||||
else
|
||||
Row(
|
||||
mainAxisSize: MainAxisSize.min,
|
||||
children: [
|
||||
VoiceMicButton(
|
||||
disabled: isAwaitingApproval,
|
||||
onAudioReady: (path) =>
|
||||
ref.read(chatProvider.notifier).sendVoiceMessage(path),
|
||||
),
|
||||
Padding(
|
||||
padding: const EdgeInsets.only(right: 4),
|
||||
child: IconButton(
|
||||
|
|
@ -709,6 +718,8 @@ class _ChatPageState extends ConsumerState<ChatPage> {
|
|||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -488,6 +488,91 @@ class ChatNotifier extends StateNotifier<ChatState> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Sends a recorded audio file as a voice message.
|
||||
///
|
||||
/// Flow:
|
||||
/// 1. Shows a temporary "识别中..." user message bubble.
|
||||
/// 2. Uploads audio to the backend voice-message endpoint.
|
||||
/// Backend runs Whisper STT, optionally interrupts any running task,
|
||||
/// and starts a new agent task with the transcript.
|
||||
/// 3. Replaces the placeholder with the real transcript.
|
||||
/// 4. Subscribes to the WS stream for the new task.
|
||||
Future<void> sendVoiceMessage(String audioPath) async {
|
||||
final tempId = '${DateTime.now().microsecondsSinceEpoch}_voice';
|
||||
final tempMsg = ChatMessage(
|
||||
id: tempId,
|
||||
role: MessageRole.user,
|
||||
content: '🎤 识别中...',
|
||||
timestamp: DateTime.now(),
|
||||
type: MessageType.text,
|
||||
);
|
||||
|
||||
// Cancel any ongoing subscription (voice message acts as interrupt)
|
||||
_eventSubscription?.cancel();
|
||||
_eventSubscription = null;
|
||||
_flushBuffersSync();
|
||||
|
||||
state = state.copyWith(
|
||||
messages: [...state.messages, tempMsg],
|
||||
agentStatus: AgentStatus.thinking,
|
||||
error: null,
|
||||
);
|
||||
|
||||
try {
|
||||
final datasource = _ref.read(chatRemoteDatasourceProvider);
|
||||
final sessionId = state.sessionId ?? 'new';
|
||||
|
||||
final result = await datasource.sendVoiceMessage(
|
||||
sessionId: sessionId,
|
||||
audioPath: audioPath,
|
||||
);
|
||||
|
||||
final returnedSessionId = result['sessionId'] as String? ?? sessionId;
|
||||
final taskId = result['taskId'] as String?;
|
||||
final transcript = result['transcript'] as String? ?? '🎤';
|
||||
|
||||
// Replace placeholder with real transcript
|
||||
final updatedMessages = state.messages
|
||||
.map((m) => m.id == tempId ? m.copyWith(content: transcript) : m)
|
||||
.toList();
|
||||
|
||||
state = state.copyWith(
|
||||
messages: updatedMessages,
|
||||
sessionId: returnedSessionId,
|
||||
taskId: taskId,
|
||||
);
|
||||
|
||||
// Subscribe to the WS stream for the running task
|
||||
final repo = _ref.read(chatRepositoryProvider);
|
||||
final stream = repo.subscribeExistingTask(
|
||||
sessionId: returnedSessionId,
|
||||
taskId: taskId ?? '',
|
||||
);
|
||||
|
||||
_eventSubscription = stream.listen(
|
||||
(event) => _handleStreamEvent(event),
|
||||
onError: (error) {
|
||||
state = state.copyWith(
|
||||
agentStatus: AgentStatus.error,
|
||||
error: '语音消息处理失败: $error',
|
||||
);
|
||||
},
|
||||
onDone: () {
|
||||
if (state.agentStatus != AgentStatus.error) {
|
||||
state = state.copyWith(agentStatus: AgentStatus.idle);
|
||||
}
|
||||
},
|
||||
);
|
||||
} catch (e) {
|
||||
// Remove placeholder on failure
|
||||
state = state.copyWith(
|
||||
messages: state.messages.where((m) => m.id != tempId).toList(),
|
||||
agentStatus: AgentStatus.error,
|
||||
error: '语音识别失败: $e',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> cancelCurrentTask() async {
|
||||
final taskId = state.taskId;
|
||||
if (taskId == null && state.sessionId == null) return;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,187 @@
|
|||
import 'dart:io';
|
||||
import 'package:flutter/material.dart';
|
||||
import 'package:path_provider/path_provider.dart';
|
||||
import 'package:record/record.dart';
|
||||
import '../../../../core/theme/app_colors.dart';
|
||||
|
||||
/// WhatsApp-style press-and-hold mic button.
|
||||
///
|
||||
/// • Press and hold → records audio to a temp file.
|
||||
/// • Release → stops recording and calls [onAudioReady] with the file path.
|
||||
/// • Slide up while holding → cancels recording without sending.
|
||||
///
|
||||
/// Requires microphone permission (handled by the `record` package).
|
||||
class VoiceMicButton extends StatefulWidget {
|
||||
/// Called with the temp file path when the user releases the button.
|
||||
final void Function(String audioPath) onAudioReady;
|
||||
|
||||
/// Whether the button should be disabled (e.g. awaiting approval).
|
||||
final bool disabled;
|
||||
|
||||
const VoiceMicButton({
|
||||
super.key,
|
||||
required this.onAudioReady,
|
||||
this.disabled = false,
|
||||
});
|
||||
|
||||
@override
|
||||
State<VoiceMicButton> createState() => _VoiceMicButtonState();
|
||||
}
|
||||
|
||||
class _VoiceMicButtonState extends State<VoiceMicButton>
|
||||
with SingleTickerProviderStateMixin {
|
||||
final _recorder = AudioRecorder();
|
||||
bool _isRecording = false;
|
||||
bool _cancelled = false;
|
||||
|
||||
// Slide-up cancel threshold (pixels above press origin)
|
||||
static const double _cancelThreshold = 60.0;
|
||||
Offset? _pressOrigin;
|
||||
|
||||
late final AnimationController _pulseController;
|
||||
late final Animation<double> _pulseAnimation;
|
||||
|
||||
@override
|
||||
void initState() {
|
||||
super.initState();
|
||||
_pulseController = AnimationController(
|
||||
vsync: this,
|
||||
duration: const Duration(milliseconds: 800),
|
||||
);
|
||||
_pulseAnimation = Tween<double>(begin: 1.0, end: 1.25).animate(
|
||||
CurvedAnimation(parent: _pulseController, curve: Curves.easeInOut),
|
||||
);
|
||||
}
|
||||
|
||||
@override
|
||||
void dispose() {
|
||||
_recorder.dispose();
|
||||
_pulseController.dispose();
|
||||
super.dispose();
|
||||
}
|
||||
|
||||
Future<void> _startRecording() async {
|
||||
final hasPermission = await _recorder.hasPermission();
|
||||
if (!hasPermission) {
|
||||
if (mounted) {
|
||||
ScaffoldMessenger.of(context).showSnackBar(
|
||||
const SnackBar(content: Text('需要麦克风权限')),
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
final dir = await getTemporaryDirectory();
|
||||
final path = '${dir.path}/voice_${DateTime.now().millisecondsSinceEpoch}.m4a';
|
||||
|
||||
await _recorder.start(
|
||||
const RecordConfig(encoder: AudioEncoder.aacLc, sampleRate: 16000),
|
||||
path: path,
|
||||
);
|
||||
|
||||
setState(() {
|
||||
_isRecording = true;
|
||||
_cancelled = false;
|
||||
});
|
||||
_pulseController.repeat(reverse: true);
|
||||
}
|
||||
|
||||
Future<void> _stopRecording({required bool cancel}) async {
|
||||
if (!_isRecording) return;
|
||||
|
||||
_pulseController.stop();
|
||||
_pulseController.reset();
|
||||
|
||||
final path = await _recorder.stop();
|
||||
setState(() => _isRecording = false);
|
||||
|
||||
if (cancel || path == null) return;
|
||||
|
||||
// Ignore empty recordings (< ~0.3s)
|
||||
try {
|
||||
final size = await File(path).length();
|
||||
if (size < 2048) return;
|
||||
} catch (_) {
|
||||
return;
|
||||
}
|
||||
|
||||
widget.onAudioReady(path);
|
||||
}
|
||||
|
||||
void _onLongPressStart(LongPressStartDetails details) {
|
||||
if (widget.disabled) return;
|
||||
_pressOrigin = details.globalPosition;
|
||||
_startRecording();
|
||||
}
|
||||
|
||||
void _onLongPressMoveUpdate(LongPressMoveUpdateDetails details) {
|
||||
if (_pressOrigin == null || !_isRecording) return;
|
||||
final dy = _pressOrigin!.dy - details.globalPosition.dy;
|
||||
setState(() => _cancelled = dy > _cancelThreshold);
|
||||
}
|
||||
|
||||
void _onLongPressEnd(LongPressEndDetails details) {
|
||||
_stopRecording(cancel: _cancelled);
|
||||
_pressOrigin = null;
|
||||
}
|
||||
|
||||
void _onLongPressCancel() {
|
||||
_stopRecording(cancel: true);
|
||||
_pressOrigin = null;
|
||||
}
|
||||
|
||||
@override
|
||||
Widget build(BuildContext context) {
|
||||
if (_isRecording) {
|
||||
return _buildRecordingButton();
|
||||
}
|
||||
return GestureDetector(
|
||||
onLongPressStart: _onLongPressStart,
|
||||
onLongPressMoveUpdate: _onLongPressMoveUpdate,
|
||||
onLongPressEnd: _onLongPressEnd,
|
||||
onLongPressCancel: _onLongPressCancel,
|
||||
child: IconButton(
|
||||
icon: Icon(
|
||||
Icons.mic_none,
|
||||
size: 22,
|
||||
color: widget.disabled ? AppColors.textMuted : null,
|
||||
),
|
||||
tooltip: '按住录音',
|
||||
onPressed: widget.disabled ? null : () {},
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
Widget _buildRecordingButton() {
|
||||
final isCancelling = _cancelled;
|
||||
return GestureDetector(
|
||||
onLongPressMoveUpdate: _onLongPressMoveUpdate,
|
||||
onLongPressEnd: _onLongPressEnd,
|
||||
onLongPressCancel: _onLongPressCancel,
|
||||
child: Padding(
|
||||
padding: const EdgeInsets.symmetric(horizontal: 4, vertical: 4),
|
||||
child: Row(
|
||||
mainAxisSize: MainAxisSize.min,
|
||||
children: [
|
||||
ScaleTransition(
|
||||
scale: _pulseAnimation,
|
||||
child: Icon(
|
||||
Icons.mic,
|
||||
size: 22,
|
||||
color: isCancelling ? AppColors.textMuted : AppColors.error,
|
||||
),
|
||||
),
|
||||
const SizedBox(width: 4),
|
||||
Text(
|
||||
isCancelling ? '松开取消' : '松开发送',
|
||||
style: TextStyle(
|
||||
fontSize: 12,
|
||||
color: isCancelling ? AppColors.textMuted : AppColors.error,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue