From 5460be8c04e2f5f9c625cb3679043122487ab271 Mon Sep 17 00:00:00 2001 From: hailin Date: Sun, 1 Mar 2026 09:38:15 -0800 Subject: [PATCH] feat: add TTS voice and style settings to Flutter app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add user-configurable TTS voice and tone style settings that flow from the Flutter app through the backend to the voice-agent at call time. ## Flutter App (it0_app) ### Domain Layer - app_settings.dart: Add `ttsVoice` (default: 'coral') and `ttsStyle` (default: '') fields to AppSettings entity with copyWith support ### Data Layer - settings_datasource.dart: Add SharedPreferences keys `settings_tts_voice` and `settings_tts_style` for local persistence in loadSettings(), saveSettings(), and clearSettings() ### Presentation Layer - settings_providers.dart: Add `setTtsVoice()` and `setTtsStyle()` methods to SettingsNotifier for Riverpod state management - settings_page.dart: Add "语音" settings group between Notifications and Security groups with: - Voice picker: 13 OpenAI voices with gender/style labels (e.g. "女 · 温暖", "男 · 沉稳", "中性") in a BottomSheet - Style picker: 5 presets (专业干练/温柔耐心/轻松活泼/严肃正式/科幻AI) as ChoiceChips + custom text input field + reset button ### Call Flow - agent_call_page.dart: Send `tts_voice` and `tts_style` in the POST body when requesting a LiveKit token at call initiation ## Backend ### voice-service (Python/FastAPI) - livekit_token.py: Accept optional `tts_voice` and `tts_style` via Pydantic TokenRequest body model; embed them in RoomAgentDispatch metadata JSON alongside auth_header (backward compatible) ### voice-agent (Python/LiveKit Agents) - agent.py: Extract `tts_voice` and `tts_style` from ctx.job.metadata; use them when creating openai_plugin.TTS() — user-selected voice overrides config default, user-selected style overrides default instructions. Falls back to config defaults when not provided. ## Data Flow Flutter Settings → SharedPreferences → POST /livekit/token body → voice-service embeds in RoomAgentDispatch metadata → voice-agent reads from ctx.job.metadata → TTS creation Co-Authored-By: Claude Opus 4.6 --- .../plugins/GeneratedPluginRegistrant.java | 25 ++ .../presentation/pages/agent_call_page.dart | 12 +- .../data/datasources/settings_datasource.dart | 8 + .../domain/entities/app_settings.dart | 8 + .../presentation/pages/settings_page.dart | 241 ++++++++++++++++++ .../providers/settings_providers.dart | 10 + it0_app/pubspec.lock | 144 +++++++++++ packages/services/voice-agent/src/agent.py | 12 +- .../voice-service/src/api/livekit_token.py | 22 +- 9 files changed, 473 insertions(+), 9 deletions(-) diff --git a/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java b/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java index 7504b65..a754401 100644 --- a/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java +++ b/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java @@ -20,6 +20,21 @@ public final class GeneratedPluginRegistrant { } catch (Exception e) { Log.e(TAG, "Error registering plugin audio_session, com.ryanheise.audio_session.AudioSessionPlugin", e); } + try { + flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.connectivity.ConnectivityPlugin()); + } catch (Exception e) { + Log.e(TAG, "Error registering plugin connectivity_plus, dev.fluttercommunity.plus.connectivity.ConnectivityPlugin", e); + } + try { + flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.device_info.DeviceInfoPlusPlugin()); + } catch (Exception e) { + Log.e(TAG, "Error registering plugin device_info_plus, dev.fluttercommunity.plus.device_info.DeviceInfoPlusPlugin", e); + } + try { + flutterEngine.getPlugins().add(new com.mr.flutter.plugin.filepicker.FilePickerPlugin()); + } catch (Exception e) { + Log.e(TAG, "Error registering plugin file_picker, com.mr.flutter.plugin.filepicker.FilePickerPlugin", e); + } try { flutterEngine.getPlugins().add(new com.dexterous.flutterlocalnotifications.FlutterLocalNotificationsPlugin()); } catch (Exception e) { @@ -45,11 +60,21 @@ public final class GeneratedPluginRegistrant { } catch (Exception e) { Log.e(TAG, "Error registering plugin flutter_tts, com.eyedeadevelopment.fluttertts.FlutterTtsPlugin", e); } + try { + flutterEngine.getPlugins().add(new com.cloudwebrtc.webrtc.FlutterWebRTCPlugin()); + } catch (Exception e) { + Log.e(TAG, "Error registering plugin flutter_webrtc, com.cloudwebrtc.webrtc.FlutterWebRTCPlugin", e); + } try { flutterEngine.getPlugins().add(new io.flutter.plugins.imagepicker.ImagePickerPlugin()); } catch (Exception e) { Log.e(TAG, "Error registering plugin image_picker_android, io.flutter.plugins.imagepicker.ImagePickerPlugin", e); } + try { + flutterEngine.getPlugins().add(new io.livekit.plugin.LiveKitPlugin()); + } catch (Exception e) { + Log.e(TAG, "Error registering plugin livekit_client, io.livekit.plugin.LiveKitPlugin", e); + } try { flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.packageinfo.PackageInfoPlugin()); } catch (Exception e) { diff --git a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart index f242ffe..b5797d2 100644 --- a/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart +++ b/it0_app/lib/features/agent_call/presentation/pages/agent_call_page.dart @@ -7,6 +7,7 @@ import '../../../../core/config/api_endpoints.dart'; import '../../../../core/config/app_config.dart'; import '../../../../core/network/dio_client.dart'; import '../../../../core/theme/app_colors.dart'; +import '../../../settings/presentation/providers/settings_providers.dart'; /// Tracks the current state of the voice call. enum _CallPhase { ringing, connecting, active, ended } @@ -68,8 +69,15 @@ class _AgentCallPageState extends ConsumerState final dio = ref.read(dioClientProvider); final config = ref.read(appConfigProvider); - // 1. Get LiveKit token from backend - final response = await dio.post(ApiEndpoints.livekitToken); + // 1. Get LiveKit token from backend (with voice preferences) + final voiceSettings = ref.read(settingsProvider); + final response = await dio.post( + ApiEndpoints.livekitToken, + data: { + if (voiceSettings.ttsVoice.isNotEmpty) 'tts_voice': voiceSettings.ttsVoice, + if (voiceSettings.ttsStyle.isNotEmpty) 'tts_style': voiceSettings.ttsStyle, + }, + ); final data = response.data as Map; final token = data['token'] as String; final livekitUrl = data['livekit_url'] as String? ?? config.livekitUrl; diff --git a/it0_app/lib/features/settings/data/datasources/settings_datasource.dart b/it0_app/lib/features/settings/data/datasources/settings_datasource.dart index 6899a45..67fb35c 100644 --- a/it0_app/lib/features/settings/data/datasources/settings_datasource.dart +++ b/it0_app/lib/features/settings/data/datasources/settings_datasource.dart @@ -14,6 +14,8 @@ class SettingsDatasource { static const String _keyTenantName = 'settings_tenant_name'; static const String _keyLanguage = 'settings_language'; static const String _keyBiometric = 'settings_biometric'; + static const String _keyTtsVoice = 'settings_tts_voice'; + static const String _keyTtsStyle = 'settings_tts_style'; SettingsDatasource(this._prefs); @@ -31,6 +33,8 @@ class SettingsDatasource { selectedTenantName: _prefs.getString(_keyTenantName), language: _prefs.getString(_keyLanguage) ?? 'en', biometricEnabled: _prefs.getBool(_keyBiometric) ?? false, + ttsVoice: _prefs.getString(_keyTtsVoice) ?? 'coral', + ttsStyle: _prefs.getString(_keyTtsStyle) ?? '', ); } @@ -55,6 +59,8 @@ class SettingsDatasource { await _prefs.setString(_keyLanguage, settings.language); await _prefs.setBool(_keyBiometric, settings.biometricEnabled); + await _prefs.setString(_keyTtsVoice, settings.ttsVoice); + await _prefs.setString(_keyTtsStyle, settings.ttsStyle); } /// Removes all settings keys from SharedPreferences. @@ -67,5 +73,7 @@ class SettingsDatasource { await _prefs.remove(_keyTenantName); await _prefs.remove(_keyLanguage); await _prefs.remove(_keyBiometric); + await _prefs.remove(_keyTtsVoice); + await _prefs.remove(_keyTtsStyle); } } diff --git a/it0_app/lib/features/settings/domain/entities/app_settings.dart b/it0_app/lib/features/settings/domain/entities/app_settings.dart index eb91c0b..d39a2b5 100644 --- a/it0_app/lib/features/settings/domain/entities/app_settings.dart +++ b/it0_app/lib/features/settings/domain/entities/app_settings.dart @@ -10,6 +10,8 @@ class AppSettings { final String? selectedTenantName; final String language; final bool biometricEnabled; + final String ttsVoice; + final String ttsStyle; const AppSettings({ this.themeMode = ThemeMode.dark, @@ -20,6 +22,8 @@ class AppSettings { this.selectedTenantName, this.language = 'en', this.biometricEnabled = false, + this.ttsVoice = 'coral', + this.ttsStyle = '', }); AppSettings copyWith({ @@ -31,6 +35,8 @@ class AppSettings { String? selectedTenantName, String? language, bool? biometricEnabled, + String? ttsVoice, + String? ttsStyle, }) { return AppSettings( themeMode: themeMode ?? this.themeMode, @@ -41,6 +47,8 @@ class AppSettings { selectedTenantName: selectedTenantName ?? this.selectedTenantName, language: language ?? this.language, biometricEnabled: biometricEnabled ?? this.biometricEnabled, + ttsVoice: ttsVoice ?? this.ttsVoice, + ttsStyle: ttsStyle ?? this.ttsStyle, ); } } diff --git a/it0_app/lib/features/settings/presentation/pages/settings_page.dart b/it0_app/lib/features/settings/presentation/pages/settings_page.dart index ea655e4..a9c7051 100644 --- a/it0_app/lib/features/settings/presentation/pages/settings_page.dart +++ b/it0_app/lib/features/settings/presentation/pages/settings_page.dart @@ -110,6 +110,34 @@ class _SettingsPageState extends ConsumerState { ), const SizedBox(height: 24), + // ===== Voice Group ===== + _SettingsGroup( + cardColor: cardColor, + children: [ + _SettingsRow( + icon: Icons.record_voice_over, + iconBg: const Color(0xFF0EA5E9), + title: '语音音色', + trailing: Text( + _voiceDisplayLabel(settings.ttsVoice), + style: TextStyle(color: subtitleColor, fontSize: 14), + ), + onTap: () => _showVoicePicker(settings.ttsVoice), + ), + _SettingsRow( + icon: Icons.tune, + iconBg: const Color(0xFFF97316), + title: '语音风格', + trailing: Text( + _styleDisplayName(settings.ttsStyle), + style: TextStyle(color: subtitleColor, fontSize: 14), + ), + onTap: () => _showStylePicker(settings.ttsStyle), + ), + ], + ), + const SizedBox(height: 24), + // ===== Security Group ===== _SettingsGroup( cardColor: cardColor, @@ -330,6 +358,219 @@ class _SettingsPageState extends ConsumerState { ); } + // ---- Voice Picker ---------------------------------------------------------- + + static const _voices = [ + ('coral', 'Coral', '女 · 温暖'), + ('nova', 'Nova', '女 · 活泼'), + ('sage', 'Sage', '女 · 知性'), + ('shimmer', 'Shimmer', '女 · 柔和'), + ('marin', 'Marin', '女 · 清澈'), + ('ash', 'Ash', '男 · 沉稳'), + ('echo', 'Echo', '男 · 清朗'), + ('onyx', 'Onyx', '男 · 低沉'), + ('verse', 'Verse', '男 · 磁性'), + ('ballad', 'Ballad', '男 · 浑厚'), + ('cedar', 'Cedar', '男 · 自然'), + ('alloy', 'Alloy', '中性'), + ('fable', 'Fable', '中性 · 叙事'), + ]; + + void _showVoicePicker(String current) { + showModalBottomSheet( + context: context, + shape: const RoundedRectangleBorder( + borderRadius: BorderRadius.vertical(top: Radius.circular(20)), + ), + builder: (ctx) { + return SafeArea( + child: Column( + mainAxisSize: MainAxisSize.min, + children: [ + const SizedBox(height: 12), + Container( + width: 40, + height: 4, + decoration: BoxDecoration( + color: Colors.grey[400], + borderRadius: BorderRadius.circular(2), + ), + ), + const SizedBox(height: 16), + Text('选择语音音色', + style: Theme.of(ctx).textTheme.titleMedium?.copyWith( + fontWeight: FontWeight.w600, + )), + const SizedBox(height: 8), + Flexible( + child: ListView( + shrinkWrap: true, + children: _voices + .map((v) => ListTile( + leading: Icon( + Icons.record_voice_over, + color: current == v.$1 + ? Theme.of(ctx).colorScheme.primary + : null, + ), + title: Text( + v.$2, + style: TextStyle( + fontWeight: current == v.$1 + ? FontWeight.w600 + : FontWeight.normal, + color: current == v.$1 + ? Theme.of(ctx).colorScheme.primary + : null, + ), + ), + subtitle: Text(v.$3, + style: TextStyle( + fontSize: 12, + color: Theme.of(ctx).hintColor)), + trailing: current == v.$1 + ? Icon(Icons.check_circle, + color: + Theme.of(ctx).colorScheme.primary) + : null, + onTap: () { + ref + .read(settingsProvider.notifier) + .setTtsVoice(v.$1); + Navigator.pop(ctx); + }, + )) + .toList(), + ), + ), + const SizedBox(height: 16), + ], + ), + ); + }, + ); + } + + // ---- Style Picker --------------------------------------------------------- + + String _voiceDisplayLabel(String voice) { + for (final v in _voices) { + if (v.$1 == voice) return '${v.$2} · ${v.$3}'; + } + return voice[0].toUpperCase() + voice.substring(1); + } + + static const _stylePresets = [ + ('专业干练', '用专业、简洁、干练的语气说话,不拖泥带水。'), + ('温柔耐心', '用温柔、耐心的语气说话,像一个贴心的朋友。'), + ('轻松活泼', '用轻松、活泼的语气说话,带一点幽默感。'), + ('严肃正式', '用严肃、正式的语气说话,像在正式会议中发言。'), + ('科幻AI', '用科幻电影中AI的语气说话,冷静、理性、略带未来感。'), + ]; + + String _styleDisplayName(String style) { + if (style.isEmpty) return '默认'; + for (final p in _stylePresets) { + if (p.$2 == style) return p.$1; + } + return style.length > 6 ? '${style.substring(0, 6)}...' : style; + } + + void _showStylePicker(String current) { + final controller = TextEditingController( + text: _stylePresets.any((p) => p.$2 == current) ? '' : current, + ); + + showModalBottomSheet( + context: context, + isScrollControlled: true, + shape: const RoundedRectangleBorder( + borderRadius: BorderRadius.vertical(top: Radius.circular(20)), + ), + builder: (ctx) { + return Padding( + padding: EdgeInsets.fromLTRB( + 24, 24, 24, MediaQuery.of(ctx).viewInsets.bottom + 24), + child: Column( + mainAxisSize: MainAxisSize.min, + children: [ + Container( + width: 40, + height: 4, + decoration: BoxDecoration( + color: Colors.grey[400], + borderRadius: BorderRadius.circular(2), + ), + ), + const SizedBox(height: 16), + Text('选择语音风格', + style: Theme.of(ctx).textTheme.titleMedium?.copyWith( + fontWeight: FontWeight.w600, + )), + const SizedBox(height: 16), + Wrap( + spacing: 8, + runSpacing: 8, + children: _stylePresets + .map((p) => ChoiceChip( + label: Text(p.$1), + selected: current == p.$2, + onSelected: (_) { + ref + .read(settingsProvider.notifier) + .setTtsStyle(p.$2); + Navigator.pop(ctx); + }, + )) + .toList(), + ), + const SizedBox(height: 16), + TextField( + controller: controller, + decoration: InputDecoration( + labelText: '自定义风格', + hintText: '例如:用东北话说话,幽默风趣', + border: OutlineInputBorder( + borderRadius: BorderRadius.circular(12)), + ), + maxLines: 2, + ), + const SizedBox(height: 12), + Row( + children: [ + Expanded( + child: TextButton( + onPressed: () { + ref.read(settingsProvider.notifier).setTtsStyle(''); + Navigator.pop(ctx); + }, + child: const Text('恢复默认'), + ), + ), + const SizedBox(width: 12), + Expanded( + child: FilledButton( + onPressed: () { + final text = controller.text.trim(); + if (text.isNotEmpty) { + ref + .read(settingsProvider.notifier) + .setTtsStyle(text); + } + Navigator.pop(ctx); + }, + child: const Text('确认'), + ), + ), + ], + ), + ], + ), + ); + }, + ); + } + // ---- Edit Name Dialog ----------------------------------------------------- void _showEditNameDialog(String currentName) { diff --git a/it0_app/lib/features/settings/presentation/providers/settings_providers.dart b/it0_app/lib/features/settings/presentation/providers/settings_providers.dart index 45e1bed..c738f17 100644 --- a/it0_app/lib/features/settings/presentation/providers/settings_providers.dart +++ b/it0_app/lib/features/settings/presentation/providers/settings_providers.dart @@ -124,6 +124,16 @@ class SettingsNotifier extends StateNotifier { await _repository?.saveSettings(state); } + Future setTtsVoice(String voice) async { + state = state.copyWith(ttsVoice: voice); + await _repository?.saveSettings(state); + } + + Future setTtsStyle(String style) async { + state = state.copyWith(ttsStyle: style); + await _repository?.saveSettings(state); + } + Future resetToDefaults() async { await _repository?.resetSettings(); state = const AppSettings(); diff --git a/it0_app/pubspec.lock b/it0_app/pubspec.lock index 48632e1..f7b22ae 100644 --- a/it0_app/pubspec.lock +++ b/it0_app/pubspec.lock @@ -9,6 +9,14 @@ packages: url: "https://pub.dev" source: hosted version: "85.0.0" + adaptive_number: + dependency: transitive + description: + name: adaptive_number + sha256: "3a567544e9b5c9c803006f51140ad544aedc79604fd4f3f2c1380003f97c1d77" + url: "https://pub.dev" + source: hosted + version: "1.0.0" analyzer: dependency: transitive description: @@ -177,6 +185,22 @@ packages: url: "https://pub.dev" source: hosted version: "1.19.1" + connectivity_plus: + dependency: transitive + description: + name: connectivity_plus + sha256: "33bae12a398f841c6cda09d1064212957265869104c478e5ad51e2fb26c3973c" + url: "https://pub.dev" + source: hosted + version: "7.0.0" + connectivity_plus_platform_interface: + dependency: transitive + description: + name: connectivity_plus_platform_interface + sha256: "42657c1715d48b167930d5f34d00222ac100475f73d10162ddf43e714932f204" + url: "https://pub.dev" + source: hosted + version: "2.0.1" convert: dependency: transitive description: @@ -225,6 +249,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.0.0+7.7.0" + dart_jsonwebtoken: + dependency: transitive + description: + name: dart_jsonwebtoken + sha256: c6ecb3bb991c459b91c5adf9e871113dcb32bbe8fe7ca2c92723f88ffc1e0b7a + url: "https://pub.dev" + source: hosted + version: "3.3.2" dart_style: dependency: transitive description: @@ -233,6 +265,14 @@ packages: url: "https://pub.dev" source: hosted version: "3.1.1" + dart_webrtc: + dependency: transitive + description: + name: dart_webrtc + sha256: "4ed7b9fa9924e5a81eb39271e2c2356739dd1039d60a13b86ba6c5f448625086" + url: "https://pub.dev" + source: hosted + version: "1.7.0" dbus: dependency: transitive description: @@ -241,6 +281,22 @@ packages: url: "https://pub.dev" source: hosted version: "0.7.12" + device_info_plus: + dependency: transitive + description: + name: device_info_plus + sha256: "4df8babf73058181227e18b08e6ea3520cf5fc5d796888d33b7cb0f33f984b7c" + url: "https://pub.dev" + source: hosted + version: "12.3.0" + device_info_plus_platform_interface: + dependency: transitive + description: + name: device_info_plus_platform_interface + sha256: e1ea89119e34903dca74b883d0dd78eb762814f97fb6c76f35e9ff74d261a18f + url: "https://pub.dev" + source: hosted + version: "7.0.3" dio: dependency: "direct main" description: @@ -257,6 +313,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.1.1" + ed25519_edwards: + dependency: transitive + description: + name: ed25519_edwards + sha256: "6ce0112d131327ec6d42beede1e5dfd526069b18ad45dcf654f15074ad9276cd" + url: "https://pub.dev" + source: hosted + version: "0.3.1" equatable: dependency: transitive description: @@ -289,6 +353,14 @@ packages: url: "https://pub.dev" source: hosted version: "7.0.1" + file_picker: + dependency: "direct main" + description: + name: file_picker + sha256: ab13ae8ef5580a411c458d6207b6774a6c237d77ac37011b13994879f68a8810 + url: "https://pub.dev" + source: hosted + version: "8.3.7" file_selector_linux: dependency: transitive description: @@ -504,6 +576,14 @@ packages: description: flutter source: sdk version: "0.0.0" + flutter_webrtc: + dependency: transitive + description: + name: flutter_webrtc + sha256: "0f86b518e9349e71a136a96e0ea11294cad8a8531b2bc9ae99e69df332ac898a" + url: "https://pub.dev" + source: hosted + version: "1.3.0" freezed: dependency: "direct dev" description: @@ -744,6 +824,14 @@ packages: url: "https://pub.dev" source: hosted version: "3.0.0" + livekit_client: + dependency: "direct main" + description: + name: livekit_client + sha256: "51d97a4501e385e53c140b8367a52316af5b466e71e6d800c8826065b6061521" + url: "https://pub.dev" + source: hosted + version: "2.6.4" logger: dependency: "direct main" description: @@ -792,6 +880,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.0.0" + mime_type: + dependency: transitive + description: + name: mime_type + sha256: d652b613e84dac1af28030a9fba82c0999be05b98163f9e18a0849c6e63838bb + url: "https://pub.dev" + source: hosted + version: "1.0.1" mockito: dependency: "direct dev" description: @@ -808,6 +904,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.0.0" + nm: + dependency: transitive + description: + name: nm + sha256: "2c9aae4127bdc8993206464fcc063611e0e36e72018696cd9631023a31b24254" + url: "https://pub.dev" + source: hosted + version: "0.5.0" package_config: dependency: transitive description: @@ -968,6 +1072,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.1.8" + pointycastle: + dependency: transitive + description: + name: pointycastle + sha256: "92aa3841d083cc4b0f4709b5c74fd6409a3e6ba833ffc7dc6a8fee096366acf5" + url: "https://pub.dev" + source: hosted + version: "4.0.0" pool: dependency: transitive description: @@ -984,6 +1096,14 @@ packages: url: "https://pub.dev" source: hosted version: "6.5.0" + protobuf: + dependency: transitive + description: + name: protobuf + sha256: "75ec242d22e950bdcc79ee38dd520ce4ee0bc491d7fadc4ea47694604d22bf06" + url: "https://pub.dev" + source: hosted + version: "6.0.0" provider: dependency: transitive description: @@ -1120,6 +1240,14 @@ packages: url: "https://pub.dev" source: hosted version: "0.28.0" + sdp_transform: + dependency: transitive + description: + name: sdp_transform + sha256: "73e412a5279a5c2de74001535208e20fff88f225c9a4571af0f7146202755e45" + url: "https://pub.dev" + source: hosted + version: "0.3.2" shared_preferences: dependency: "direct main" description: @@ -1517,6 +1645,14 @@ packages: url: "https://pub.dev" source: hosted version: "2.4.0" + webrtc_interface: + dependency: transitive + description: + name: webrtc_interface + sha256: ad0e5786b2acd3be72a3219ef1dde9e1cac071cf4604c685f11b61d63cdd6eb3 + url: "https://pub.dev" + source: hosted + version: "1.4.0" win32: dependency: transitive description: @@ -1525,6 +1661,14 @@ packages: url: "https://pub.dev" source: hosted version: "5.15.0" + win32_registry: + dependency: transitive + description: + name: win32_registry + sha256: "6f1b564492d0147b330dd794fee8f512cec4977957f310f9951b5f9d83618dae" + url: "https://pub.dev" + source: hosted + version: "2.1.0" xdg_directories: dependency: transitive description: diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py index 71e3757..85d2bf3 100644 --- a/packages/services/voice-agent/src/agent.py +++ b/packages/services/voice-agent/src/agent.py @@ -125,14 +125,19 @@ async def entrypoint(ctx: JobContext) -> None: # The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata, # which LiveKit passes through as job.metadata to the agent worker. auth_header = "" + tts_voice = settings.openai_tts_voice + tts_style = "" try: meta_str = ctx.job.metadata or "{}" meta = json.loads(meta_str) auth_header = meta.get("auth_header", "") + tts_voice = meta.get("tts_voice", settings.openai_tts_voice) + tts_style = meta.get("tts_style", "") except Exception as e: logger.warning("Failed to parse job metadata: %s", e) - logger.info("Auth header present: %s", bool(auth_header)) + logger.info("Auth header present: %s, TTS: voice=%s, style=%s", + bool(auth_header), tts_voice, tts_style[:50] if tts_style else "(default)") # Build STT if settings.stt_provider == "openai": @@ -165,10 +170,11 @@ async def entrypoint(ctx: JobContext) -> None: _http_client_tts = _httpx.AsyncClient(verify=False) _oai_client_tts = _openai.AsyncOpenAI(http_client=_http_client_tts) + default_instructions = "用自然、友好的中文语气说话,语速适中,像真人助手一样。" tts = openai_plugin.TTS( model=settings.openai_tts_model, - voice=settings.openai_tts_voice, - instructions="用自然、友好的中文语气说话,语速适中,像真人助手一样。", + voice=tts_voice, + instructions=tts_style if tts_style else default_instructions, client=_oai_client_tts, ) else: diff --git a/packages/services/voice-service/src/api/livekit_token.py b/packages/services/voice-service/src/api/livekit_token.py index 4951cbb..a1be55e 100644 --- a/packages/services/voice-service/src/api/livekit_token.py +++ b/packages/services/voice-service/src/api/livekit_token.py @@ -9,8 +9,10 @@ Generates a LiveKit room JWT for voice calls. The token includes: import json import uuid +from typing import Optional from fastapi import APIRouter, Request +from pydantic import BaseModel from livekit import api as livekit_api @@ -19,18 +21,30 @@ from ..config.settings import settings router = APIRouter() +class TokenRequest(BaseModel): + tts_voice: Optional[str] = None + tts_style: Optional[str] = None + + @router.post("/livekit/token") -async def create_livekit_token(request: Request): +async def create_livekit_token(request: Request, body: TokenRequest = TokenRequest()): """Generate a LiveKit room token for a voice call session. - The caller's Authorization header is embedded in the agent dispatch - metadata so that the voice-agent can forward it to agent-service. + The caller's Authorization header and optional TTS preferences are + embedded in the agent dispatch metadata so the voice-agent can use them. """ auth_header = request.headers.get("authorization", "") room_name = f"voice-{uuid.uuid4().hex[:12]}" participant_identity = f"user-{uuid.uuid4().hex[:8]}" + # Build metadata with auth + optional voice preferences + metadata: dict = {"auth_header": auth_header} + if body.tts_voice: + metadata["tts_voice"] = body.tts_voice + if body.tts_style: + metadata["tts_style"] = body.tts_style + token = ( livekit_api.AccessToken(settings.livekit_api_key, settings.livekit_api_secret) .with_identity(participant_identity) @@ -49,7 +63,7 @@ async def create_livekit_token(request: Request): agents=[ livekit_api.RoomAgentDispatch( agent_name="voice-agent", - metadata=json.dumps({"auth_header": auth_header}), + metadata=json.dumps(metadata), ) ], )