feat(it0_app): add WhatsApp-style voice message with async agent interrupt
New VoiceMicButton widget (press-and-hold to record, release to send):
- Records audio to a temp .m4a file via the `record` package
- Slide-up gesture cancels recording without sending
- Pulsing red mic icon + "松开发送/松开取消" feedback during recording
New flow for voice messages:
1. Temp "🎤 识别中..." bubble shown immediately
2. Audio uploaded to POST /api/v1/agent/sessions/:id/voice-message
(multipart/form-data; backend runs Whisper STT)
3. Placeholder replaced with real transcript
4. WS stream subscribed via new subscribeExistingTask() to receive
agent's streaming response — same pipeline as text chat
Voice messages act as async interrupts: if the agent is mid-task the
backend hard-cancels it before processing the new voice command,
so whoever presses the mic button always takes priority.
Files changed:
chat_remote_datasource.dart — sendVoiceMessage() multipart upload
chat_repository.dart — subscribeExistingTask() interface method
chat_repository_impl.dart — implement subscribeExistingTask(); fix
sendVoiceMessage() stub
chat_providers.dart — ChatNotifier.sendVoiceMessage()
voice_mic_button.dart — NEW press-and-hold recording widget
chat_page.dart — mic button added to input area
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a2af76bcd7
commit
55b983a950
|
|
@ -149,4 +149,27 @@ class ChatRemoteDatasource {
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Uploads an audio file to the voice-message endpoint.
|
||||||
|
/// Backend performs STT, interrupts any running task if needed, and
|
||||||
|
/// starts a new agent task with the transcript.
|
||||||
|
/// Returns { sessionId, taskId, transcript }.
|
||||||
|
Future<Map<String, dynamic>> sendVoiceMessage({
|
||||||
|
required String sessionId,
|
||||||
|
required String audioPath,
|
||||||
|
String language = 'zh',
|
||||||
|
}) async {
|
||||||
|
final formData = FormData.fromMap({
|
||||||
|
'audio': await MultipartFile.fromFile(
|
||||||
|
audioPath,
|
||||||
|
filename: audioPath.split('/').last,
|
||||||
|
),
|
||||||
|
'language': language,
|
||||||
|
});
|
||||||
|
final response = await _dio.post(
|
||||||
|
'${ApiEndpoints.sessions}/$sessionId/voice-message',
|
||||||
|
data: formData,
|
||||||
|
);
|
||||||
|
return response.data as Map<String, dynamic>;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -82,28 +82,22 @@ class ChatRepositoryImpl implements ChatRepository {
|
||||||
required String sessionId,
|
required String sessionId,
|
||||||
required String audioPath,
|
required String audioPath,
|
||||||
}) async* {
|
}) async* {
|
||||||
// For voice messages, we POST the audio file, then subscribe to WebSocket events
|
// Kept for interface compatibility — ChatNotifier calls sendVoiceMessage
|
||||||
// similar to sendMessage. The backend handles STT + agent processing.
|
// on the datasource directly (to get the transcript), then calls
|
||||||
final response = await _remoteDatasource.createTask(
|
// subscribeExistingTask. This method is not used.
|
||||||
sessionId: sessionId,
|
yield* subscribeExistingTask(sessionId: sessionId, taskId: '');
|
||||||
message: '[voice_input]',
|
}
|
||||||
attachments: [{'filePath': audioPath, 'mediaType': 'audio/wav'}],
|
|
||||||
);
|
|
||||||
|
|
||||||
final returnedSessionId = response['sessionId'] as String? ??
|
@override
|
||||||
response['session_id'] as String? ??
|
Stream<StreamEvent> subscribeExistingTask({
|
||||||
sessionId;
|
required String sessionId,
|
||||||
final taskId = response['taskId'] as String? ?? response['task_id'] as String?;
|
required String taskId,
|
||||||
|
}) async* {
|
||||||
// Emit the real sessionId and taskId so the notifier can capture them
|
final token = await _getAccessToken();
|
||||||
yield SessionInfoEvent(returnedSessionId);
|
await _webSocketClient.connect('/ws/agent', token: token);
|
||||||
if (taskId != null) yield TaskInfoEvent(taskId);
|
|
||||||
|
|
||||||
final voiceToken = await _getAccessToken();
|
|
||||||
await _webSocketClient.connect('/ws/agent', token: voiceToken);
|
|
||||||
_webSocketClient.send({
|
_webSocketClient.send({
|
||||||
'event': 'subscribe_session',
|
'event': 'subscribe_session',
|
||||||
'data': {'sessionId': returnedSessionId, 'taskId': taskId},
|
'data': {'sessionId': sessionId, 'taskId': taskId},
|
||||||
});
|
});
|
||||||
|
|
||||||
yield* _webSocketClient.messages.transform(
|
yield* _webSocketClient.messages.transform(
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,14 @@ abstract class ChatRepository {
|
||||||
required String message,
|
required String message,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/// Subscribes to an already-running agent task's WS stream without
|
||||||
|
/// re-submitting a prompt. Used after a voice message upload to receive
|
||||||
|
/// the agent's streamed response.
|
||||||
|
Stream<StreamEvent> subscribeExistingTask({
|
||||||
|
required String sessionId,
|
||||||
|
required String taskId,
|
||||||
|
});
|
||||||
|
|
||||||
/// Confirms a standing order draft proposed by the agent.
|
/// Confirms a standing order draft proposed by the agent.
|
||||||
Future<void> confirmStandingOrder(
|
Future<void> confirmStandingOrder(
|
||||||
String sessionId,
|
String sessionId,
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import '../widgets/stream_text_widget.dart';
|
||||||
import '../widgets/approval_action_card.dart';
|
import '../widgets/approval_action_card.dart';
|
||||||
import '../widgets/conversation_drawer.dart';
|
import '../widgets/conversation_drawer.dart';
|
||||||
import '../../../agent_call/presentation/pages/agent_call_page.dart';
|
import '../../../agent_call/presentation/pages/agent_call_page.dart';
|
||||||
|
import '../widgets/voice_mic_button.dart';
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Chat page – Timeline workflow style (inspired by Claude Code VSCode)
|
// Chat page – Timeline workflow style (inspired by Claude Code VSCode)
|
||||||
|
|
@ -698,12 +699,22 @@ class _ChatPageState extends ConsumerState<ChatPage> {
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
else
|
else
|
||||||
Padding(
|
Row(
|
||||||
padding: const EdgeInsets.only(right: 4),
|
mainAxisSize: MainAxisSize.min,
|
||||||
child: IconButton(
|
children: [
|
||||||
icon: const Icon(Icons.send, size: 20),
|
VoiceMicButton(
|
||||||
onPressed: isAwaitingApproval ? null : _send,
|
disabled: isAwaitingApproval,
|
||||||
),
|
onAudioReady: (path) =>
|
||||||
|
ref.read(chatProvider.notifier).sendVoiceMessage(path),
|
||||||
|
),
|
||||||
|
Padding(
|
||||||
|
padding: const EdgeInsets.only(right: 4),
|
||||||
|
child: IconButton(
|
||||||
|
icon: const Icon(Icons.send, size: 20),
|
||||||
|
onPressed: isAwaitingApproval ? null : _send,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -488,6 +488,91 @@ class ChatNotifier extends StateNotifier<ChatState> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sends a recorded audio file as a voice message.
|
||||||
|
///
|
||||||
|
/// Flow:
|
||||||
|
/// 1. Shows a temporary "识别中..." user message bubble.
|
||||||
|
/// 2. Uploads audio to the backend voice-message endpoint.
|
||||||
|
/// Backend runs Whisper STT, optionally interrupts any running task,
|
||||||
|
/// and starts a new agent task with the transcript.
|
||||||
|
/// 3. Replaces the placeholder with the real transcript.
|
||||||
|
/// 4. Subscribes to the WS stream for the new task.
|
||||||
|
Future<void> sendVoiceMessage(String audioPath) async {
|
||||||
|
final tempId = '${DateTime.now().microsecondsSinceEpoch}_voice';
|
||||||
|
final tempMsg = ChatMessage(
|
||||||
|
id: tempId,
|
||||||
|
role: MessageRole.user,
|
||||||
|
content: '🎤 识别中...',
|
||||||
|
timestamp: DateTime.now(),
|
||||||
|
type: MessageType.text,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Cancel any ongoing subscription (voice message acts as interrupt)
|
||||||
|
_eventSubscription?.cancel();
|
||||||
|
_eventSubscription = null;
|
||||||
|
_flushBuffersSync();
|
||||||
|
|
||||||
|
state = state.copyWith(
|
||||||
|
messages: [...state.messages, tempMsg],
|
||||||
|
agentStatus: AgentStatus.thinking,
|
||||||
|
error: null,
|
||||||
|
);
|
||||||
|
|
||||||
|
try {
|
||||||
|
final datasource = _ref.read(chatRemoteDatasourceProvider);
|
||||||
|
final sessionId = state.sessionId ?? 'new';
|
||||||
|
|
||||||
|
final result = await datasource.sendVoiceMessage(
|
||||||
|
sessionId: sessionId,
|
||||||
|
audioPath: audioPath,
|
||||||
|
);
|
||||||
|
|
||||||
|
final returnedSessionId = result['sessionId'] as String? ?? sessionId;
|
||||||
|
final taskId = result['taskId'] as String?;
|
||||||
|
final transcript = result['transcript'] as String? ?? '🎤';
|
||||||
|
|
||||||
|
// Replace placeholder with real transcript
|
||||||
|
final updatedMessages = state.messages
|
||||||
|
.map((m) => m.id == tempId ? m.copyWith(content: transcript) : m)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
state = state.copyWith(
|
||||||
|
messages: updatedMessages,
|
||||||
|
sessionId: returnedSessionId,
|
||||||
|
taskId: taskId,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Subscribe to the WS stream for the running task
|
||||||
|
final repo = _ref.read(chatRepositoryProvider);
|
||||||
|
final stream = repo.subscribeExistingTask(
|
||||||
|
sessionId: returnedSessionId,
|
||||||
|
taskId: taskId ?? '',
|
||||||
|
);
|
||||||
|
|
||||||
|
_eventSubscription = stream.listen(
|
||||||
|
(event) => _handleStreamEvent(event),
|
||||||
|
onError: (error) {
|
||||||
|
state = state.copyWith(
|
||||||
|
agentStatus: AgentStatus.error,
|
||||||
|
error: '语音消息处理失败: $error',
|
||||||
|
);
|
||||||
|
},
|
||||||
|
onDone: () {
|
||||||
|
if (state.agentStatus != AgentStatus.error) {
|
||||||
|
state = state.copyWith(agentStatus: AgentStatus.idle);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
// Remove placeholder on failure
|
||||||
|
state = state.copyWith(
|
||||||
|
messages: state.messages.where((m) => m.id != tempId).toList(),
|
||||||
|
agentStatus: AgentStatus.error,
|
||||||
|
error: '语音识别失败: $e',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Future<void> cancelCurrentTask() async {
|
Future<void> cancelCurrentTask() async {
|
||||||
final taskId = state.taskId;
|
final taskId = state.taskId;
|
||||||
if (taskId == null && state.sessionId == null) return;
|
if (taskId == null && state.sessionId == null) return;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,187 @@
|
||||||
|
import 'dart:io';
|
||||||
|
import 'package:flutter/material.dart';
|
||||||
|
import 'package:path_provider/path_provider.dart';
|
||||||
|
import 'package:record/record.dart';
|
||||||
|
import '../../../../core/theme/app_colors.dart';
|
||||||
|
|
||||||
|
/// WhatsApp-style press-and-hold mic button.
|
||||||
|
///
|
||||||
|
/// • Press and hold → records audio to a temp file.
|
||||||
|
/// • Release → stops recording and calls [onAudioReady] with the file path.
|
||||||
|
/// • Slide up while holding → cancels recording without sending.
|
||||||
|
///
|
||||||
|
/// Requires microphone permission (handled by the `record` package).
|
||||||
|
class VoiceMicButton extends StatefulWidget {
|
||||||
|
/// Called with the temp file path when the user releases the button.
|
||||||
|
final void Function(String audioPath) onAudioReady;
|
||||||
|
|
||||||
|
/// Whether the button should be disabled (e.g. awaiting approval).
|
||||||
|
final bool disabled;
|
||||||
|
|
||||||
|
const VoiceMicButton({
|
||||||
|
super.key,
|
||||||
|
required this.onAudioReady,
|
||||||
|
this.disabled = false,
|
||||||
|
});
|
||||||
|
|
||||||
|
@override
|
||||||
|
State<VoiceMicButton> createState() => _VoiceMicButtonState();
|
||||||
|
}
|
||||||
|
|
||||||
|
class _VoiceMicButtonState extends State<VoiceMicButton>
|
||||||
|
with SingleTickerProviderStateMixin {
|
||||||
|
final _recorder = AudioRecorder();
|
||||||
|
bool _isRecording = false;
|
||||||
|
bool _cancelled = false;
|
||||||
|
|
||||||
|
// Slide-up cancel threshold (pixels above press origin)
|
||||||
|
static const double _cancelThreshold = 60.0;
|
||||||
|
Offset? _pressOrigin;
|
||||||
|
|
||||||
|
late final AnimationController _pulseController;
|
||||||
|
late final Animation<double> _pulseAnimation;
|
||||||
|
|
||||||
|
@override
|
||||||
|
void initState() {
|
||||||
|
super.initState();
|
||||||
|
_pulseController = AnimationController(
|
||||||
|
vsync: this,
|
||||||
|
duration: const Duration(milliseconds: 800),
|
||||||
|
);
|
||||||
|
_pulseAnimation = Tween<double>(begin: 1.0, end: 1.25).animate(
|
||||||
|
CurvedAnimation(parent: _pulseController, curve: Curves.easeInOut),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@override
|
||||||
|
void dispose() {
|
||||||
|
_recorder.dispose();
|
||||||
|
_pulseController.dispose();
|
||||||
|
super.dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<void> _startRecording() async {
|
||||||
|
final hasPermission = await _recorder.hasPermission();
|
||||||
|
if (!hasPermission) {
|
||||||
|
if (mounted) {
|
||||||
|
ScaffoldMessenger.of(context).showSnackBar(
|
||||||
|
const SnackBar(content: Text('需要麦克风权限')),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final dir = await getTemporaryDirectory();
|
||||||
|
final path = '${dir.path}/voice_${DateTime.now().millisecondsSinceEpoch}.m4a';
|
||||||
|
|
||||||
|
await _recorder.start(
|
||||||
|
const RecordConfig(encoder: AudioEncoder.aacLc, sampleRate: 16000),
|
||||||
|
path: path,
|
||||||
|
);
|
||||||
|
|
||||||
|
setState(() {
|
||||||
|
_isRecording = true;
|
||||||
|
_cancelled = false;
|
||||||
|
});
|
||||||
|
_pulseController.repeat(reverse: true);
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<void> _stopRecording({required bool cancel}) async {
|
||||||
|
if (!_isRecording) return;
|
||||||
|
|
||||||
|
_pulseController.stop();
|
||||||
|
_pulseController.reset();
|
||||||
|
|
||||||
|
final path = await _recorder.stop();
|
||||||
|
setState(() => _isRecording = false);
|
||||||
|
|
||||||
|
if (cancel || path == null) return;
|
||||||
|
|
||||||
|
// Ignore empty recordings (< ~0.3s)
|
||||||
|
try {
|
||||||
|
final size = await File(path).length();
|
||||||
|
if (size < 2048) return;
|
||||||
|
} catch (_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
widget.onAudioReady(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
void _onLongPressStart(LongPressStartDetails details) {
|
||||||
|
if (widget.disabled) return;
|
||||||
|
_pressOrigin = details.globalPosition;
|
||||||
|
_startRecording();
|
||||||
|
}
|
||||||
|
|
||||||
|
void _onLongPressMoveUpdate(LongPressMoveUpdateDetails details) {
|
||||||
|
if (_pressOrigin == null || !_isRecording) return;
|
||||||
|
final dy = _pressOrigin!.dy - details.globalPosition.dy;
|
||||||
|
setState(() => _cancelled = dy > _cancelThreshold);
|
||||||
|
}
|
||||||
|
|
||||||
|
void _onLongPressEnd(LongPressEndDetails details) {
|
||||||
|
_stopRecording(cancel: _cancelled);
|
||||||
|
_pressOrigin = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
void _onLongPressCancel() {
|
||||||
|
_stopRecording(cancel: true);
|
||||||
|
_pressOrigin = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@override
|
||||||
|
Widget build(BuildContext context) {
|
||||||
|
if (_isRecording) {
|
||||||
|
return _buildRecordingButton();
|
||||||
|
}
|
||||||
|
return GestureDetector(
|
||||||
|
onLongPressStart: _onLongPressStart,
|
||||||
|
onLongPressMoveUpdate: _onLongPressMoveUpdate,
|
||||||
|
onLongPressEnd: _onLongPressEnd,
|
||||||
|
onLongPressCancel: _onLongPressCancel,
|
||||||
|
child: IconButton(
|
||||||
|
icon: Icon(
|
||||||
|
Icons.mic_none,
|
||||||
|
size: 22,
|
||||||
|
color: widget.disabled ? AppColors.textMuted : null,
|
||||||
|
),
|
||||||
|
tooltip: '按住录音',
|
||||||
|
onPressed: widget.disabled ? null : () {},
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Widget _buildRecordingButton() {
|
||||||
|
final isCancelling = _cancelled;
|
||||||
|
return GestureDetector(
|
||||||
|
onLongPressMoveUpdate: _onLongPressMoveUpdate,
|
||||||
|
onLongPressEnd: _onLongPressEnd,
|
||||||
|
onLongPressCancel: _onLongPressCancel,
|
||||||
|
child: Padding(
|
||||||
|
padding: const EdgeInsets.symmetric(horizontal: 4, vertical: 4),
|
||||||
|
child: Row(
|
||||||
|
mainAxisSize: MainAxisSize.min,
|
||||||
|
children: [
|
||||||
|
ScaleTransition(
|
||||||
|
scale: _pulseAnimation,
|
||||||
|
child: Icon(
|
||||||
|
Icons.mic,
|
||||||
|
size: 22,
|
||||||
|
color: isCancelling ? AppColors.textMuted : AppColors.error,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
const SizedBox(width: 4),
|
||||||
|
Text(
|
||||||
|
isCancelling ? '松开取消' : '松开发送',
|
||||||
|
style: TextStyle(
|
||||||
|
fontSize: 12,
|
||||||
|
color: isCancelling ? AppColors.textMuted : AppColors.error,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue