diff --git a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart index 94d16cd..12a4b0d 100644 --- a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart +++ b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart @@ -80,6 +80,7 @@ class _AgentCallPageState extends ConsumerState data: { if (voiceSettings.ttsVoice.isNotEmpty) 'tts_voice': voiceSettings.ttsVoice, if (voiceSettings.ttsStyle.isNotEmpty) 'tts_style': voiceSettings.ttsStyle, + 'engine_type': voiceSettings.engineType, }, ); final data = response.data as Map; diff --git a/it0_app/lib/features/settings/data/datasources/settings_datasource.dart b/it0_app/lib/features/settings/data/datasources/settings_datasource.dart index 67fb35c..07715a1 100644 --- a/it0_app/lib/features/settings/data/datasources/settings_datasource.dart +++ b/it0_app/lib/features/settings/data/datasources/settings_datasource.dart @@ -16,6 +16,7 @@ class SettingsDatasource { static const String _keyBiometric = 'settings_biometric'; static const String _keyTtsVoice = 'settings_tts_voice'; static const String _keyTtsStyle = 'settings_tts_style'; + static const String _keyEngineType = 'settings_engine_type'; SettingsDatasource(this._prefs); @@ -35,6 +36,7 @@ class SettingsDatasource { biometricEnabled: _prefs.getBool(_keyBiometric) ?? false, ttsVoice: _prefs.getString(_keyTtsVoice) ?? 'coral', ttsStyle: _prefs.getString(_keyTtsStyle) ?? '', + engineType: _prefs.getString(_keyEngineType) ?? 'claude_agent_sdk', ); } @@ -61,6 +63,7 @@ class SettingsDatasource { await _prefs.setBool(_keyBiometric, settings.biometricEnabled); await _prefs.setString(_keyTtsVoice, settings.ttsVoice); await _prefs.setString(_keyTtsStyle, settings.ttsStyle); + await _prefs.setString(_keyEngineType, settings.engineType); } /// Removes all settings keys from SharedPreferences. @@ -75,5 +78,6 @@ class SettingsDatasource { await _prefs.remove(_keyBiometric); await _prefs.remove(_keyTtsVoice); await _prefs.remove(_keyTtsStyle); + await _prefs.remove(_keyEngineType); } } diff --git a/it0_app/lib/features/settings/domain/entities/app_settings.dart b/it0_app/lib/features/settings/domain/entities/app_settings.dart index d39a2b5..373d5ce 100644 --- a/it0_app/lib/features/settings/domain/entities/app_settings.dart +++ b/it0_app/lib/features/settings/domain/entities/app_settings.dart @@ -12,6 +12,7 @@ class AppSettings { final bool biometricEnabled; final String ttsVoice; final String ttsStyle; + final String engineType; const AppSettings({ this.themeMode = ThemeMode.dark, @@ -24,6 +25,7 @@ class AppSettings { this.biometricEnabled = false, this.ttsVoice = 'coral', this.ttsStyle = '', + this.engineType = 'claude_agent_sdk', }); AppSettings copyWith({ @@ -37,6 +39,7 @@ class AppSettings { bool? biometricEnabled, String? ttsVoice, String? ttsStyle, + String? engineType, }) { return AppSettings( themeMode: themeMode ?? this.themeMode, @@ -49,6 +52,7 @@ class AppSettings { biometricEnabled: biometricEnabled ?? this.biometricEnabled, ttsVoice: ttsVoice ?? this.ttsVoice, ttsStyle: ttsStyle ?? this.ttsStyle, + engineType: engineType ?? this.engineType, ); } } diff --git a/it0_app/lib/features/settings/presentation/pages/settings_page.dart b/it0_app/lib/features/settings/presentation/pages/settings_page.dart index a9c7051..b8f55ee 100644 --- a/it0_app/lib/features/settings/presentation/pages/settings_page.dart +++ b/it0_app/lib/features/settings/presentation/pages/settings_page.dart @@ -114,6 +114,16 @@ class _SettingsPageState extends ConsumerState { _SettingsGroup( cardColor: cardColor, children: [ + _SettingsRow( + icon: Icons.psychology, + iconBg: const Color(0xFF7C3AED), + title: '对话引擎', + trailing: Text( + settings.engineType == 'claude_agent_sdk' ? 'Agent SDK' : 'Claude API', + style: TextStyle(color: subtitleColor, fontSize: 14), + ), + onTap: () => _showEngineTypePicker(settings.engineType), + ), _SettingsRow( icon: Icons.record_voice_over, iconBg: const Color(0xFF0EA5E9), @@ -376,6 +386,60 @@ class _SettingsPageState extends ConsumerState { ('fable', 'Fable', '中性 · 叙事'), ]; + void _showEngineTypePicker(String current) { + final engines = [ + ('claude_agent_sdk', 'Agent SDK', '支持工具审批、技能注入、会话恢复'), + ('claude_api', 'Claude API', '直连 API,响应更快'), + ]; + showModalBottomSheet( + context: context, + shape: const RoundedRectangleBorder( + borderRadius: BorderRadius.vertical(top: Radius.circular(20)), + ), + builder: (ctx) => Padding( + padding: const EdgeInsets.symmetric(vertical: 16), + child: Column( + mainAxisSize: MainAxisSize.min, + children: [ + Container( + width: 36, + height: 4, + decoration: BoxDecoration( + color: Colors.grey[400], + borderRadius: BorderRadius.circular(2), + ), + ), + const SizedBox(height: 16), + Text('选择对话引擎', + style: Theme.of(ctx).textTheme.titleMedium?.copyWith( + fontWeight: FontWeight.w600, + )), + const SizedBox(height: 12), + ...engines.map((e) => ListTile( + leading: Icon( + e.$1 == 'claude_agent_sdk' ? Icons.psychology : Icons.api, + color: e.$1 == current ? AppColors.primary : null, + ), + title: Text(e.$2, + style: TextStyle( + fontWeight: e.$1 == current ? FontWeight.bold : FontWeight.normal, + color: e.$1 == current ? AppColors.primary : null, + )), + subtitle: Text(e.$3, style: const TextStyle(fontSize: 12)), + trailing: e.$1 == current + ? const Icon(Icons.check_circle, color: AppColors.primary) + : null, + onTap: () { + ref.read(settingsProvider.notifier).setEngineType(e.$1); + Navigator.pop(ctx); + }, + )), + ], + ), + ), + ); + } + void _showVoicePicker(String current) { showModalBottomSheet( context: context, diff --git a/it0_app/lib/features/settings/presentation/providers/settings_providers.dart b/it0_app/lib/features/settings/presentation/providers/settings_providers.dart index c738f17..24658b0 100644 --- a/it0_app/lib/features/settings/presentation/providers/settings_providers.dart +++ b/it0_app/lib/features/settings/presentation/providers/settings_providers.dart @@ -134,6 +134,11 @@ class SettingsNotifier extends StateNotifier { await _repository?.saveSettings(state); } + Future setEngineType(String type) async { + state = state.copyWith(engineType: type); + await _repository?.saveSettings(state); + } + Future resetToDefaults() async { await _repository?.resetSettings(); state = const AppSettings(); diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py index b671aa7..07892d3 100644 --- a/packages/services/voice-agent/src/agent.py +++ b/packages/services/voice-agent/src/agent.py @@ -200,17 +200,19 @@ async def entrypoint(ctx: JobContext) -> None: auth_header = "" tts_voice = settings.openai_tts_voice tts_style = "" + engine_type = "claude_agent_sdk" try: meta_str = ctx.job.metadata or "{}" meta = json.loads(meta_str) auth_header = meta.get("auth_header", "") tts_voice = meta.get("tts_voice", settings.openai_tts_voice) tts_style = meta.get("tts_style", "") + engine_type = meta.get("engine_type", "claude_agent_sdk") except Exception as e: logger.warning("Failed to parse job metadata: %s", e) - logger.info("Auth header present: %s, TTS: voice=%s, style=%s", - bool(auth_header), tts_voice, tts_style[:50] if tts_style else "(default)") + logger.info("Auth header present: %s, TTS: voice=%s, style=%s, engine=%s", + bool(auth_header), tts_voice, tts_style[:50] if tts_style else "(default)", engine_type) # Build STT if settings.stt_provider == "openai": @@ -270,6 +272,7 @@ async def entrypoint(ctx: JobContext) -> None: llm = AgentServiceLLM( agent_service_url=settings.agent_service_url, auth_header=auth_header, + engine_type=engine_type, ) # Create and start AgentSession with the full pipeline diff --git a/packages/services/voice-agent/src/plugins/agent_llm.py b/packages/services/voice-agent/src/plugins/agent_llm.py index 90c66af..82eab8f 100644 --- a/packages/services/voice-agent/src/plugins/agent_llm.py +++ b/packages/services/voice-agent/src/plugins/agent_llm.py @@ -2,10 +2,13 @@ Custom LLM plugin that proxies to IT0 agent-service. Instead of calling Claude directly, this plugin: -1. POSTs to agent-service /api/v1/agent/tasks (creates a task with engineType=claude_api) +1. POSTs to agent-service /api/v1/agent/tasks (engineType configurable: claude_agent_sdk or claude_api) 2. Subscribes to the agent-service WebSocket /ws/agent for streaming text events 3. Emits ChatChunk objects into the LiveKit pipeline +In Agent SDK mode, the prompt is wrapped with voice-conversation instructions +so the agent outputs concise spoken Chinese without tool-call details. + This preserves all agent-service capabilities: Tool Use, conversation history, tenant isolation, and session management. """ @@ -38,10 +41,12 @@ class AgentServiceLLM(llm.LLM): *, agent_service_url: str = "http://agent-service:3002", auth_header: str = "", + engine_type: str = "claude_agent_sdk", ): super().__init__() self._agent_service_url = agent_service_url self._auth_header = auth_header + self._engine_type = engine_type self._agent_session_id: str | None = None @property @@ -205,14 +210,29 @@ class AgentServiceLLMStream(llm.LLMStream): })) # 2. Create agent task (with timeout) + engine_type = self._llm_instance._engine_type + prompt = user_text + + # Agent SDK mode: instruct the agent to output concise spoken Chinese + # (skip tool-call details and intermediate steps) + if engine_type == "claude_agent_sdk": + prompt = ( + "【语音对话模式】你正在通过语音与用户实时对话。请严格遵守以下规则:\n" + "1. 只输出用户关注的最终答案,不要输出工具调用过程、中间步骤或技术细节\n" + "2. 用简洁自然的口语中文回答,像面对面对话一样\n" + "3. 回复要简短精炼,适合语音播报,通常1-3句话即可\n" + "4. 不要使用markdown格式、代码块、列表符号等文本格式\n" + f"\n用户说:{user_text}" + ) + body: dict[str, Any] = { - "prompt": user_text, - "engineType": "claude_api", + "prompt": prompt, + "engineType": engine_type, } if self._llm_instance._agent_session_id: body["sessionId"] = self._llm_instance._agent_session_id - logger.info("POST /tasks prompt=%s", user_text[:80]) + logger.info("POST /tasks engine=%s prompt=%s", engine_type, user_text[:80]) async with httpx.AsyncClient( timeout=httpx.Timeout(connect=10, read=30, write=10, pool=10), ) as client: diff --git a/packages/services/voice-service/src/api/livekit_token.py b/packages/services/voice-service/src/api/livekit_token.py index a1be55e..39bfd56 100644 --- a/packages/services/voice-service/src/api/livekit_token.py +++ b/packages/services/voice-service/src/api/livekit_token.py @@ -24,6 +24,7 @@ router = APIRouter() class TokenRequest(BaseModel): tts_voice: Optional[str] = None tts_style: Optional[str] = None + engine_type: Optional[str] = None @router.post("/livekit/token") @@ -44,6 +45,8 @@ async def create_livekit_token(request: Request, body: TokenRequest = TokenReque metadata["tts_voice"] = body.tts_voice if body.tts_style: metadata["tts_style"] = body.tts_style + if body.engine_type: + metadata["engine_type"] = body.engine_type token = ( livekit_api.AccessToken(settings.livekit_api_key, settings.livekit_api_secret)