diff --git a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart index 12a4b0d..425456e 100644 --- a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart +++ b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart @@ -1,12 +1,17 @@ import 'dart:async'; +import 'dart:convert'; +import 'dart:io'; import 'dart:math'; import 'package:flutter/material.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:livekit_client/livekit_client.dart'; +import 'package:image_picker/image_picker.dart'; +import 'package:file_picker/file_picker.dart'; import '../../../../core/config/api_endpoints.dart'; import '../../../../core/config/app_config.dart'; import '../../../../core/network/dio_client.dart'; import '../../../../core/theme/app_colors.dart'; +import '../../../chat/domain/entities/chat_message.dart'; import '../../../settings/presentation/providers/settings_providers.dart'; /// Tracks the current state of the voice call. @@ -49,6 +54,12 @@ class _AgentCallPageState extends ConsumerState // Prevent double-actions bool _userEndedCall = false; + // Text input (mixed-mode: text + attachments during voice call) + final _textController = TextEditingController(); + bool _isInputExpanded = false; + bool _isSending = false; + final List _pendingAttachments = []; + @override void initState() { super.initState(); @@ -247,6 +258,238 @@ class _AgentCallPageState extends ConsumerState setState(() {}); } + // --------------------------------------------------------------------------- + // Text input + attachments (mixed-mode during voice call) + // --------------------------------------------------------------------------- + + static const _maxAttachments = 5; + + Future _sendTextMessage() async { + final text = _textController.text.trim(); + if (text.isEmpty && _pendingAttachments.isEmpty) return; + if (_room?.localParticipant == null) return; + + setState(() => _isSending = true); + + try { + final payload = { + 'type': 'text_inject', + 'text': text, + if (_pendingAttachments.isNotEmpty) + 'attachments': _pendingAttachments.map((a) => a.toJson()).toList(), + }; + final jsonStr = jsonEncode(payload); + final bytes = utf8.encode(jsonStr); + + // Check size limit (200KB for LiveKit data channel) + if (bytes.length > 200 * 1024) { + if (mounted) { + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar(content: Text('消息太大,请减少附件数量或大小')), + ); + } + return; + } + + await _room!.localParticipant!.publishData( + bytes, + reliable: true, + topic: 'text_inject', + ); + + _textController.clear(); + setState(() => _pendingAttachments.clear()); + + if (mounted) { + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar( + content: Text('已发送'), + duration: Duration(seconds: 1), + ), + ); + } + } catch (e) { + if (mounted) { + ScaffoldMessenger.of(context).showSnackBar( + SnackBar(content: Text('发送失败: $e')), + ); + } + } finally { + if (mounted) setState(() => _isSending = false); + } + } + + void _showAttachmentOptions() { + showModalBottomSheet( + context: context, + builder: (ctx) => SafeArea( + child: Column( + mainAxisSize: MainAxisSize.min, + children: [ + ListTile( + leading: const Icon(Icons.photo_library), + title: const Text('从相册选择'), + subtitle: const Text('支持多选'), + onTap: () { Navigator.pop(ctx); _pickMultipleImages(); }, + ), + ListTile( + leading: const Icon(Icons.camera_alt), + title: const Text('拍照'), + onTap: () { Navigator.pop(ctx); _pickImage(ImageSource.camera); }, + ), + ListTile( + leading: const Icon(Icons.attach_file), + title: const Text('选择文件'), + subtitle: const Text('图片、PDF'), + onTap: () { Navigator.pop(ctx); _pickFile(); }, + ), + ], + ), + ), + ); + } + + Future _pickImage(ImageSource source) async { + if (_pendingAttachments.length >= _maxAttachments) { + if (mounted) { + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar(content: Text('最多添加 $_maxAttachments 个附件')), + ); + } + return; + } + + final picker = ImagePicker(); + final picked = await picker.pickImage( + source: source, + maxWidth: 1024, + maxHeight: 1024, + imageQuality: 80, + ); + if (picked == null) return; + + final bytes = await picked.readAsBytes(); + final ext = picked.path.split('.').last.toLowerCase(); + final mediaType = switch (ext) { + 'png' => 'image/png', + 'webp' => 'image/webp', + 'gif' => 'image/gif', + _ => 'image/jpeg', + }; + + setState(() { + _pendingAttachments.add(ChatAttachment( + base64Data: base64Encode(bytes), + mediaType: mediaType, + fileName: picked.name, + )); + }); + } + + Future _pickMultipleImages() async { + final remaining = _maxAttachments - _pendingAttachments.length; + if (remaining <= 0) { + if (mounted) { + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar(content: Text('最多添加 $_maxAttachments 个附件')), + ); + } + return; + } + + final picker = ImagePicker(); + final pickedList = await picker.pickMultiImage( + maxWidth: 1024, + maxHeight: 1024, + imageQuality: 80, + ); + if (pickedList.isEmpty) return; + + final toAdd = pickedList.take(remaining); + for (final picked in toAdd) { + final bytes = await picked.readAsBytes(); + final ext = picked.path.split('.').last.toLowerCase(); + final mediaType = switch (ext) { + 'png' => 'image/png', + 'webp' => 'image/webp', + 'gif' => 'image/gif', + _ => 'image/jpeg', + }; + _pendingAttachments.add(ChatAttachment( + base64Data: base64Encode(bytes), + mediaType: mediaType, + fileName: picked.name, + )); + } + setState(() {}); + + if (pickedList.length > remaining && mounted) { + ScaffoldMessenger.of(context).showSnackBar( + SnackBar(content: Text('已选择 $remaining 张,最多 $_maxAttachments 个')), + ); + } + } + + Future _pickFile() async { + final remaining = _maxAttachments - _pendingAttachments.length; + if (remaining <= 0) { + if (mounted) { + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar(content: Text('最多添加 $_maxAttachments 个附件')), + ); + } + return; + } + + final result = await FilePicker.platform.pickFiles( + type: FileType.any, + allowMultiple: true, + ); + if (result == null || result.files.isEmpty) return; + + const allowedExts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'pdf'}; + int skipped = 0; + + final toAdd = result.files.take(remaining); + for (final file in toAdd) { + if (file.path == null) continue; + final ext = (file.extension ?? '').toLowerCase(); + if (!allowedExts.contains(ext)) { + skipped++; + continue; + } + final bytes = await File(file.path!).readAsBytes(); + final String mediaType; + if (ext == 'pdf') { + mediaType = 'application/pdf'; + } else { + mediaType = switch (ext) { + 'png' => 'image/png', + 'webp' => 'image/webp', + 'gif' => 'image/gif', + _ => 'image/jpeg', + }; + } + + _pendingAttachments.add(ChatAttachment( + base64Data: base64Encode(bytes), + mediaType: mediaType, + fileName: file.name, + )); + } + setState(() {}); + + if (skipped > 0 && mounted) { + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar(content: Text('仅支持图片(jpg/png/gif/webp)和PDF文件')), + ); + } else if (result.files.length > remaining && mounted) { + ScaffoldMessenger.of(context).showSnackBar( + SnackBar(content: Text('已选择 $remaining 个,最多 $_maxAttachments 个')), + ); + } + } + // --------------------------------------------------------------------------- // Build // --------------------------------------------------------------------------- @@ -345,9 +588,11 @@ class _AgentCallPageState extends ConsumerState ), ), if (_phase == _CallPhase.active) _buildWaveform(), - const Spacer(flex: 3), + Spacer(flex: _isInputExpanded ? 1 : 3), + if (_isInputExpanded && _phase == _CallPhase.active) + _buildTextInputArea(), _buildControls(), - const SizedBox(height: 48), + SizedBox(height: _isInputExpanded ? 12 : 48), ], ), ), @@ -464,6 +709,126 @@ class _AgentCallPageState extends ConsumerState ); } + Widget _buildTextInputArea() { + return Padding( + padding: const EdgeInsets.symmetric(horizontal: 24), + child: Column( + mainAxisSize: MainAxisSize.min, + children: [ + if (_pendingAttachments.isNotEmpty) + Padding( + padding: const EdgeInsets.only(bottom: 8), + child: _buildAttachmentPreview(), + ), + Container( + decoration: BoxDecoration( + color: AppColors.surface.withOpacity(0.92), + borderRadius: BorderRadius.circular(24), + border: Border.all(color: AppColors.surfaceLight.withOpacity(0.6)), + ), + child: Row( + children: [ + Padding( + padding: const EdgeInsets.only(left: 4), + child: IconButton( + icon: const Icon(Icons.add_circle_outline, size: 22), + onPressed: _showAttachmentOptions, + ), + ), + Expanded( + child: TextField( + controller: _textController, + decoration: const InputDecoration( + hintText: '输入文字或添加附件...', + hintStyle: TextStyle(color: AppColors.textMuted), + border: InputBorder.none, + contentPadding: EdgeInsets.symmetric(vertical: 12), + ), + textInputAction: TextInputAction.send, + onSubmitted: (_) => _sendTextMessage(), + ), + ), + Padding( + padding: const EdgeInsets.only(right: 4), + child: IconButton( + icon: _isSending + ? const SizedBox( + width: 20, height: 20, + child: CircularProgressIndicator(strokeWidth: 2), + ) + : const Icon(Icons.send, size: 20), + onPressed: _isSending ? null : _sendTextMessage, + ), + ), + ], + ), + ), + ], + ), + ); + } + + Widget _buildAttachmentPreview() { + return SizedBox( + height: 80, + child: ListView.builder( + scrollDirection: Axis.horizontal, + itemCount: _pendingAttachments.length, + itemBuilder: (ctx, i) { + final att = _pendingAttachments[i]; + final isImage = att.mediaType.startsWith('image/'); + return Stack( + children: [ + Padding( + padding: const EdgeInsets.all(4), + child: ClipRRect( + borderRadius: BorderRadius.circular(8), + child: isImage + ? Image.memory( + base64Decode(att.base64Data), + width: 72, height: 72, + fit: BoxFit.cover, + cacheWidth: 144, cacheHeight: 144, + ) + : Container( + width: 72, height: 72, + color: AppColors.surfaceLight, + child: Column( + mainAxisAlignment: MainAxisAlignment.center, + children: [ + const Icon(Icons.description, size: 28, color: AppColors.textSecondary), + const SizedBox(height: 2), + Text( + att.fileName?.split('.').last.toUpperCase() ?? 'FILE', + style: const TextStyle(fontSize: 10, color: AppColors.textMuted), + overflow: TextOverflow.ellipsis, + ), + ], + ), + ), + ), + ), + Positioned( + top: 0, + right: 0, + child: GestureDetector( + onTap: () => setState(() => _pendingAttachments.removeAt(i)), + child: Container( + decoration: const BoxDecoration( + color: Colors.black54, + shape: BoxShape.circle, + ), + child: const Icon(Icons.close, size: 16, color: Colors.white), + ), + ), + ), + ], + ); + }, + ), + ); + } + Widget _buildControls() { switch (_phase) { case _CallPhase.ringing: @@ -493,20 +858,27 @@ class _AgentCallPageState extends ConsumerState return Row( mainAxisAlignment: MainAxisAlignment.center, children: [ + _CircleButton( + icon: _isInputExpanded ? Icons.keyboard_hide : Icons.keyboard, + label: _isInputExpanded ? '收起' : '键盘', + isActive: _isInputExpanded, + onTap: () => setState(() => _isInputExpanded = !_isInputExpanded), + ), + const SizedBox(width: 16), _CircleButton( icon: _isMuted ? Icons.mic_off : Icons.mic, label: _isMuted ? '取消静音' : '静音', isActive: _isMuted, onTap: _toggleMute, ), - const SizedBox(width: 24), + const SizedBox(width: 16), FloatingActionButton( heroTag: 'end', backgroundColor: AppColors.error, onPressed: _endCall, child: const Icon(Icons.call_end, color: Colors.white), ), - const SizedBox(width: 24), + const SizedBox(width: 16), _CircleButton( icon: _isSpeaker ? Icons.volume_up : Icons.hearing, label: _isSpeaker ? '扬声器' : '听筒', @@ -525,6 +897,7 @@ class _AgentCallPageState extends ConsumerState @override void dispose() { _userEndedCall = true; + _textController.dispose(); _durationTimer?.cancel(); _waveTimer?.cancel(); _waveController.dispose(); diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py index 1d647d0..11f2dd0 100644 --- a/packages/services/voice-agent/src/agent.py +++ b/packages/services/voice-agent/src/agent.py @@ -290,6 +290,49 @@ async def entrypoint(ctx: JobContext) -> None: logger.info("Voice session started for room %s", ctx.room.name) + # --------------------------------------------------------------------- + # Data channel listener: receive text + attachments from Flutter client + # --------------------------------------------------------------------- + async def _on_data_received(data_packet) -> None: + try: + if data_packet.topic != "text_inject": + return + + payload = json.loads(data_packet.data.decode("utf-8")) + text = payload.get("text", "") + attachments = payload.get("attachments") + + logger.info( + "text_inject received: text=%s attachments=%d", + text[:80] if text else "(empty)", + len(attachments) if attachments else 0, + ) + + if not text and not attachments: + return + + # Call agent-service with the same session (context preserved) + response = await llm.inject_text_message( + text=text, + attachments=attachments, + ) + + if response: + logger.info("inject response: %s", response[:100]) + session.say(response) + else: + logger.warning("inject_text_message returned empty response") + + except Exception as exc: + logger.error( + "text_inject handler error: %s: %s", + type(exc).__name__, exc, exc_info=True, + ) + + # Use ensure_future because ctx.room.on() uses a sync event emitter + # (same pattern as the "disconnected" handler above) + ctx.room.on("data_received", lambda dp: asyncio.ensure_future(_on_data_received(dp))) + except Exception as exc: logger.error( "Voice session failed for room %s: %s: %s", diff --git a/packages/services/voice-agent/src/plugins/agent_llm.py b/packages/services/voice-agent/src/plugins/agent_llm.py index e6939fe..cc9eb32 100644 --- a/packages/services/voice-agent/src/plugins/agent_llm.py +++ b/packages/services/voice-agent/src/plugins/agent_llm.py @@ -48,6 +48,7 @@ class AgentServiceLLM(llm.LLM): self._auth_header = auth_header self._engine_type = engine_type self._agent_session_id: str | None = None + self._injecting = False # guard: don't clear session during inject @property def model(self) -> str: @@ -74,6 +75,176 @@ class AgentServiceLLM(llm.LLM): conn_options=conn_options, ) + async def inject_text_message( + self, + *, + text: str = "", + attachments: list[dict] | None = None, + ) -> str: + """Inject a text message (with optional attachments) into the agent session. + + Returns the complete response text for TTS playback via session.say(). + Uses the same session ID so conversation context is preserved. + """ + if not text and not attachments: + return "" + + self._injecting = True + try: + return await self._do_inject(text, attachments) + except Exception as exc: + logger.error("inject_text_message error: %s: %s", type(exc).__name__, exc) + return "" + finally: + self._injecting = False + + async def _do_inject( + self, + text: str, + attachments: list[dict] | None, + ) -> str: + """Execute inject: WS+HTTP stream, collect full response text.""" + import time + + agent_url = self._agent_service_url + auth_header = self._auth_header + + headers: dict[str, str] = {"Content-Type": "application/json"} + if auth_header: + headers["Authorization"] = auth_header + + ws_url = agent_url.replace("http://", "ws://").replace("https://", "wss://") + ws_url = f"{ws_url}/ws/agent" + + timeout_secs = 120 + engine_type = self._engine_type + voice_mode = engine_type == "claude_agent_sdk" + + body: dict[str, Any] = { + "prompt": text if text else "(see attachments)", + "engineType": engine_type, + "voiceMode": voice_mode, + } + + if voice_mode: + body["systemPrompt"] = ( + "你正在通过语音与用户实时对话。请严格遵守以下规则:\n" + "1. 只输出用户关注的最终答案,不要输出工具调用过程、中间步骤或技术细节\n" + "2. 用简洁自然的口语中文回答,像面对面对话一样\n" + "3. 回复要简短精炼,适合语音播报,通常1-3句话即可\n" + "4. 不要使用markdown格式、代码块、列表符号等文本格式" + ) + + if self._agent_session_id: + body["sessionId"] = self._agent_session_id + + if attachments: + body["attachments"] = attachments + + logger.info( + "inject POST /tasks engine=%s text=%s attachments=%d", + engine_type, + text[:80] if text else "(empty)", + len(attachments) if attachments else 0, + ) + + collected_text = "" + + async with websockets.connect( + ws_url, + open_timeout=10, + close_timeout=5, + ping_interval=20, + ping_timeout=10, + ) as ws: + # Pre-subscribe + if self._agent_session_id: + await ws.send(json.dumps({ + "event": "subscribe_session", + "data": {"sessionId": self._agent_session_id}, + })) + + # Create task + async with httpx.AsyncClient( + timeout=httpx.Timeout(connect=10, read=30, write=10, pool=10), + ) as client: + resp = await client.post( + f"{agent_url}/api/v1/agent/tasks", + json=body, + headers=headers, + ) + + if resp.status_code not in (200, 201): + logger.error( + "inject task creation failed: %d %s", + resp.status_code, resp.text[:200], + ) + return "" + + data = resp.json() + session_id = data.get("sessionId", "") + task_id = data.get("taskId", "") + self._agent_session_id = session_id + logger.info( + "inject task created: session=%s, task=%s", + session_id, task_id, + ) + + # Subscribe with actual IDs + await ws.send(json.dumps({ + "event": "subscribe_session", + "data": {"sessionId": session_id, "taskId": task_id}, + })) + + # Stream events → collect text + deadline = time.time() + timeout_secs + + while time.time() < deadline: + remaining = deadline - time.time() + try: + raw = await asyncio.wait_for( + ws.recv(), timeout=min(30.0, remaining) + ) + except asyncio.TimeoutError: + if time.time() >= deadline: + logger.warning("inject stream timeout after %ds", timeout_secs) + continue + except websockets.exceptions.ConnectionClosed: + logger.warning("inject WS connection closed") + break + + try: + msg = json.loads(raw) + except (json.JSONDecodeError, TypeError): + continue + + event_type = msg.get("event", "") + + if event_type == "stream_event": + evt_data = msg.get("data", {}) + evt_type = evt_data.get("type", "") + + if evt_type == "text": + content = evt_data.get("content", "") + if content: + collected_text += content + + elif evt_type == "completed": + logger.info( + "inject stream completed, text length=%d", + len(collected_text), + ) + return collected_text + + elif evt_type == "error": + err_msg = evt_data.get("message", "Unknown error") + logger.error("inject error: %s", err_msg) + if "aborted" in err_msg.lower() or "exited" in err_msg.lower(): + self._agent_session_id = None + return collected_text if collected_text else "" + + return collected_text + class AgentServiceLLMStream(llm.LLMStream): """Streams text from agent-service via WebSocket.""" @@ -336,9 +507,13 @@ class AgentServiceLLMStream(llm.LLMStream): logger.error("Agent error: %s", err_msg) # Clear session so next task starts fresh # (don't try to resume a dead/aborted session) + # But skip if inject is in progress — it owns the session if "aborted" in err_msg.lower() or "exited" in err_msg.lower(): - logger.info("Clearing agent session after abort/exit") - self._llm_instance._agent_session_id = None + if not self._llm_instance._injecting: + logger.info("Clearing agent session after abort/exit") + self._llm_instance._agent_session_id = None + else: + logger.info("Skipping session clear — inject in progress") self._event_ch.send_nowait( llm.ChatChunk( id=request_id,