feat: add TTS voice and style settings to Flutter app

Add user-configurable TTS voice and tone style settings that flow from
the Flutter app through the backend to the voice-agent at call time.

## Flutter App (it0_app)

### Domain Layer
- app_settings.dart: Add `ttsVoice` (default: 'coral') and `ttsStyle`
  (default: '') fields to AppSettings entity with copyWith support

### Data Layer
- settings_datasource.dart: Add SharedPreferences keys
  `settings_tts_voice` and `settings_tts_style` for local persistence
  in loadSettings(), saveSettings(), and clearSettings()

### Presentation Layer
- settings_providers.dart: Add `setTtsVoice()` and `setTtsStyle()`
  methods to SettingsNotifier for Riverpod state management
- settings_page.dart: Add "语音" settings group between Notifications
  and Security groups with:
  - Voice picker: 13 OpenAI voices with gender/style labels
    (e.g. "女 · 温暖", "男 · 沉稳", "中性") in a BottomSheet
  - Style picker: 5 presets (专业干练/温柔耐心/轻松活泼/严肃正式/科幻AI)
    as ChoiceChips + custom text input field + reset button

### Call Flow
- agent_call_page.dart: Send `tts_voice` and `tts_style` in the POST
  body when requesting a LiveKit token at call initiation

## Backend

### voice-service (Python/FastAPI)
- livekit_token.py: Accept optional `tts_voice` and `tts_style` via
  Pydantic TokenRequest body model; embed them in RoomAgentDispatch
  metadata JSON alongside auth_header (backward compatible)

### voice-agent (Python/LiveKit Agents)
- agent.py: Extract `tts_voice` and `tts_style` from ctx.job.metadata;
  use them when creating openai_plugin.TTS() — user-selected voice
  overrides config default, user-selected style overrides default
  instructions. Falls back to config defaults when not provided.

## Data Flow
Flutter Settings → SharedPreferences → POST /livekit/token body →
voice-service embeds in RoomAgentDispatch metadata →
voice-agent reads from ctx.job.metadata → TTS creation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-03-01 09:38:15 -08:00
parent 2dc361f7a0
commit 5460be8c04
9 changed files with 473 additions and 9 deletions

View File

@ -20,6 +20,21 @@ public final class GeneratedPluginRegistrant {
} catch (Exception e) { } catch (Exception e) {
Log.e(TAG, "Error registering plugin audio_session, com.ryanheise.audio_session.AudioSessionPlugin", e); Log.e(TAG, "Error registering plugin audio_session, com.ryanheise.audio_session.AudioSessionPlugin", e);
} }
try {
flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.connectivity.ConnectivityPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin connectivity_plus, dev.fluttercommunity.plus.connectivity.ConnectivityPlugin", e);
}
try {
flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.device_info.DeviceInfoPlusPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin device_info_plus, dev.fluttercommunity.plus.device_info.DeviceInfoPlusPlugin", e);
}
try {
flutterEngine.getPlugins().add(new com.mr.flutter.plugin.filepicker.FilePickerPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin file_picker, com.mr.flutter.plugin.filepicker.FilePickerPlugin", e);
}
try { try {
flutterEngine.getPlugins().add(new com.dexterous.flutterlocalnotifications.FlutterLocalNotificationsPlugin()); flutterEngine.getPlugins().add(new com.dexterous.flutterlocalnotifications.FlutterLocalNotificationsPlugin());
} catch (Exception e) { } catch (Exception e) {
@ -45,11 +60,21 @@ public final class GeneratedPluginRegistrant {
} catch (Exception e) { } catch (Exception e) {
Log.e(TAG, "Error registering plugin flutter_tts, com.eyedeadevelopment.fluttertts.FlutterTtsPlugin", e); Log.e(TAG, "Error registering plugin flutter_tts, com.eyedeadevelopment.fluttertts.FlutterTtsPlugin", e);
} }
try {
flutterEngine.getPlugins().add(new com.cloudwebrtc.webrtc.FlutterWebRTCPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin flutter_webrtc, com.cloudwebrtc.webrtc.FlutterWebRTCPlugin", e);
}
try { try {
flutterEngine.getPlugins().add(new io.flutter.plugins.imagepicker.ImagePickerPlugin()); flutterEngine.getPlugins().add(new io.flutter.plugins.imagepicker.ImagePickerPlugin());
} catch (Exception e) { } catch (Exception e) {
Log.e(TAG, "Error registering plugin image_picker_android, io.flutter.plugins.imagepicker.ImagePickerPlugin", e); Log.e(TAG, "Error registering plugin image_picker_android, io.flutter.plugins.imagepicker.ImagePickerPlugin", e);
} }
try {
flutterEngine.getPlugins().add(new io.livekit.plugin.LiveKitPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin livekit_client, io.livekit.plugin.LiveKitPlugin", e);
}
try { try {
flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.packageinfo.PackageInfoPlugin()); flutterEngine.getPlugins().add(new dev.fluttercommunity.plus.packageinfo.PackageInfoPlugin());
} catch (Exception e) { } catch (Exception e) {

View File

@ -7,6 +7,7 @@ import '../../../../core/config/api_endpoints.dart';
import '../../../../core/config/app_config.dart'; import '../../../../core/config/app_config.dart';
import '../../../../core/network/dio_client.dart'; import '../../../../core/network/dio_client.dart';
import '../../../../core/theme/app_colors.dart'; import '../../../../core/theme/app_colors.dart';
import '../../../settings/presentation/providers/settings_providers.dart';
/// Tracks the current state of the voice call. /// Tracks the current state of the voice call.
enum _CallPhase { ringing, connecting, active, ended } enum _CallPhase { ringing, connecting, active, ended }
@ -68,8 +69,15 @@ class _AgentCallPageState extends ConsumerState<AgentCallPage>
final dio = ref.read(dioClientProvider); final dio = ref.read(dioClientProvider);
final config = ref.read(appConfigProvider); final config = ref.read(appConfigProvider);
// 1. Get LiveKit token from backend // 1. Get LiveKit token from backend (with voice preferences)
final response = await dio.post(ApiEndpoints.livekitToken); final voiceSettings = ref.read(settingsProvider);
final response = await dio.post(
ApiEndpoints.livekitToken,
data: {
if (voiceSettings.ttsVoice.isNotEmpty) 'tts_voice': voiceSettings.ttsVoice,
if (voiceSettings.ttsStyle.isNotEmpty) 'tts_style': voiceSettings.ttsStyle,
},
);
final data = response.data as Map<String, dynamic>; final data = response.data as Map<String, dynamic>;
final token = data['token'] as String; final token = data['token'] as String;
final livekitUrl = data['livekit_url'] as String? ?? config.livekitUrl; final livekitUrl = data['livekit_url'] as String? ?? config.livekitUrl;

View File

@ -14,6 +14,8 @@ class SettingsDatasource {
static const String _keyTenantName = 'settings_tenant_name'; static const String _keyTenantName = 'settings_tenant_name';
static const String _keyLanguage = 'settings_language'; static const String _keyLanguage = 'settings_language';
static const String _keyBiometric = 'settings_biometric'; static const String _keyBiometric = 'settings_biometric';
static const String _keyTtsVoice = 'settings_tts_voice';
static const String _keyTtsStyle = 'settings_tts_style';
SettingsDatasource(this._prefs); SettingsDatasource(this._prefs);
@ -31,6 +33,8 @@ class SettingsDatasource {
selectedTenantName: _prefs.getString(_keyTenantName), selectedTenantName: _prefs.getString(_keyTenantName),
language: _prefs.getString(_keyLanguage) ?? 'en', language: _prefs.getString(_keyLanguage) ?? 'en',
biometricEnabled: _prefs.getBool(_keyBiometric) ?? false, biometricEnabled: _prefs.getBool(_keyBiometric) ?? false,
ttsVoice: _prefs.getString(_keyTtsVoice) ?? 'coral',
ttsStyle: _prefs.getString(_keyTtsStyle) ?? '',
); );
} }
@ -55,6 +59,8 @@ class SettingsDatasource {
await _prefs.setString(_keyLanguage, settings.language); await _prefs.setString(_keyLanguage, settings.language);
await _prefs.setBool(_keyBiometric, settings.biometricEnabled); await _prefs.setBool(_keyBiometric, settings.biometricEnabled);
await _prefs.setString(_keyTtsVoice, settings.ttsVoice);
await _prefs.setString(_keyTtsStyle, settings.ttsStyle);
} }
/// Removes all settings keys from SharedPreferences. /// Removes all settings keys from SharedPreferences.
@ -67,5 +73,7 @@ class SettingsDatasource {
await _prefs.remove(_keyTenantName); await _prefs.remove(_keyTenantName);
await _prefs.remove(_keyLanguage); await _prefs.remove(_keyLanguage);
await _prefs.remove(_keyBiometric); await _prefs.remove(_keyBiometric);
await _prefs.remove(_keyTtsVoice);
await _prefs.remove(_keyTtsStyle);
} }
} }

View File

@ -10,6 +10,8 @@ class AppSettings {
final String? selectedTenantName; final String? selectedTenantName;
final String language; final String language;
final bool biometricEnabled; final bool biometricEnabled;
final String ttsVoice;
final String ttsStyle;
const AppSettings({ const AppSettings({
this.themeMode = ThemeMode.dark, this.themeMode = ThemeMode.dark,
@ -20,6 +22,8 @@ class AppSettings {
this.selectedTenantName, this.selectedTenantName,
this.language = 'en', this.language = 'en',
this.biometricEnabled = false, this.biometricEnabled = false,
this.ttsVoice = 'coral',
this.ttsStyle = '',
}); });
AppSettings copyWith({ AppSettings copyWith({
@ -31,6 +35,8 @@ class AppSettings {
String? selectedTenantName, String? selectedTenantName,
String? language, String? language,
bool? biometricEnabled, bool? biometricEnabled,
String? ttsVoice,
String? ttsStyle,
}) { }) {
return AppSettings( return AppSettings(
themeMode: themeMode ?? this.themeMode, themeMode: themeMode ?? this.themeMode,
@ -41,6 +47,8 @@ class AppSettings {
selectedTenantName: selectedTenantName ?? this.selectedTenantName, selectedTenantName: selectedTenantName ?? this.selectedTenantName,
language: language ?? this.language, language: language ?? this.language,
biometricEnabled: biometricEnabled ?? this.biometricEnabled, biometricEnabled: biometricEnabled ?? this.biometricEnabled,
ttsVoice: ttsVoice ?? this.ttsVoice,
ttsStyle: ttsStyle ?? this.ttsStyle,
); );
} }
} }

View File

@ -110,6 +110,34 @@ class _SettingsPageState extends ConsumerState<SettingsPage> {
), ),
const SizedBox(height: 24), const SizedBox(height: 24),
// ===== Voice Group =====
_SettingsGroup(
cardColor: cardColor,
children: [
_SettingsRow(
icon: Icons.record_voice_over,
iconBg: const Color(0xFF0EA5E9),
title: '语音音色',
trailing: Text(
_voiceDisplayLabel(settings.ttsVoice),
style: TextStyle(color: subtitleColor, fontSize: 14),
),
onTap: () => _showVoicePicker(settings.ttsVoice),
),
_SettingsRow(
icon: Icons.tune,
iconBg: const Color(0xFFF97316),
title: '语音风格',
trailing: Text(
_styleDisplayName(settings.ttsStyle),
style: TextStyle(color: subtitleColor, fontSize: 14),
),
onTap: () => _showStylePicker(settings.ttsStyle),
),
],
),
const SizedBox(height: 24),
// ===== Security Group ===== // ===== Security Group =====
_SettingsGroup( _SettingsGroup(
cardColor: cardColor, cardColor: cardColor,
@ -330,6 +358,219 @@ class _SettingsPageState extends ConsumerState<SettingsPage> {
); );
} }
// ---- Voice Picker ----------------------------------------------------------
static const _voices = [
('coral', 'Coral', '女 · 温暖'),
('nova', 'Nova', '女 · 活泼'),
('sage', 'Sage', '女 · 知性'),
('shimmer', 'Shimmer', '女 · 柔和'),
('marin', 'Marin', '女 · 清澈'),
('ash', 'Ash', '男 · 沉稳'),
('echo', 'Echo', '男 · 清朗'),
('onyx', 'Onyx', '男 · 低沉'),
('verse', 'Verse', '男 · 磁性'),
('ballad', 'Ballad', '男 · 浑厚'),
('cedar', 'Cedar', '男 · 自然'),
('alloy', 'Alloy', '中性'),
('fable', 'Fable', '中性 · 叙事'),
];
void _showVoicePicker(String current) {
showModalBottomSheet(
context: context,
shape: const RoundedRectangleBorder(
borderRadius: BorderRadius.vertical(top: Radius.circular(20)),
),
builder: (ctx) {
return SafeArea(
child: Column(
mainAxisSize: MainAxisSize.min,
children: [
const SizedBox(height: 12),
Container(
width: 40,
height: 4,
decoration: BoxDecoration(
color: Colors.grey[400],
borderRadius: BorderRadius.circular(2),
),
),
const SizedBox(height: 16),
Text('选择语音音色',
style: Theme.of(ctx).textTheme.titleMedium?.copyWith(
fontWeight: FontWeight.w600,
)),
const SizedBox(height: 8),
Flexible(
child: ListView(
shrinkWrap: true,
children: _voices
.map((v) => ListTile(
leading: Icon(
Icons.record_voice_over,
color: current == v.$1
? Theme.of(ctx).colorScheme.primary
: null,
),
title: Text(
v.$2,
style: TextStyle(
fontWeight: current == v.$1
? FontWeight.w600
: FontWeight.normal,
color: current == v.$1
? Theme.of(ctx).colorScheme.primary
: null,
),
),
subtitle: Text(v.$3,
style: TextStyle(
fontSize: 12,
color: Theme.of(ctx).hintColor)),
trailing: current == v.$1
? Icon(Icons.check_circle,
color:
Theme.of(ctx).colorScheme.primary)
: null,
onTap: () {
ref
.read(settingsProvider.notifier)
.setTtsVoice(v.$1);
Navigator.pop(ctx);
},
))
.toList(),
),
),
const SizedBox(height: 16),
],
),
);
},
);
}
// ---- Style Picker ---------------------------------------------------------
String _voiceDisplayLabel(String voice) {
for (final v in _voices) {
if (v.$1 == voice) return '${v.$2} · ${v.$3}';
}
return voice[0].toUpperCase() + voice.substring(1);
}
static const _stylePresets = [
('专业干练', '用专业、简洁、干练的语气说话,不拖泥带水。'),
('温柔耐心', '用温柔、耐心的语气说话,像一个贴心的朋友。'),
('轻松活泼', '用轻松、活泼的语气说话,带一点幽默感。'),
('严肃正式', '用严肃、正式的语气说话,像在正式会议中发言。'),
('科幻AI', '用科幻电影中AI的语气说话冷静、理性、略带未来感。'),
];
String _styleDisplayName(String style) {
if (style.isEmpty) return '默认';
for (final p in _stylePresets) {
if (p.$2 == style) return p.$1;
}
return style.length > 6 ? '${style.substring(0, 6)}...' : style;
}
void _showStylePicker(String current) {
final controller = TextEditingController(
text: _stylePresets.any((p) => p.$2 == current) ? '' : current,
);
showModalBottomSheet(
context: context,
isScrollControlled: true,
shape: const RoundedRectangleBorder(
borderRadius: BorderRadius.vertical(top: Radius.circular(20)),
),
builder: (ctx) {
return Padding(
padding: EdgeInsets.fromLTRB(
24, 24, 24, MediaQuery.of(ctx).viewInsets.bottom + 24),
child: Column(
mainAxisSize: MainAxisSize.min,
children: [
Container(
width: 40,
height: 4,
decoration: BoxDecoration(
color: Colors.grey[400],
borderRadius: BorderRadius.circular(2),
),
),
const SizedBox(height: 16),
Text('选择语音风格',
style: Theme.of(ctx).textTheme.titleMedium?.copyWith(
fontWeight: FontWeight.w600,
)),
const SizedBox(height: 16),
Wrap(
spacing: 8,
runSpacing: 8,
children: _stylePresets
.map((p) => ChoiceChip(
label: Text(p.$1),
selected: current == p.$2,
onSelected: (_) {
ref
.read(settingsProvider.notifier)
.setTtsStyle(p.$2);
Navigator.pop(ctx);
},
))
.toList(),
),
const SizedBox(height: 16),
TextField(
controller: controller,
decoration: InputDecoration(
labelText: '自定义风格',
hintText: '例如:用东北话说话,幽默风趣',
border: OutlineInputBorder(
borderRadius: BorderRadius.circular(12)),
),
maxLines: 2,
),
const SizedBox(height: 12),
Row(
children: [
Expanded(
child: TextButton(
onPressed: () {
ref.read(settingsProvider.notifier).setTtsStyle('');
Navigator.pop(ctx);
},
child: const Text('恢复默认'),
),
),
const SizedBox(width: 12),
Expanded(
child: FilledButton(
onPressed: () {
final text = controller.text.trim();
if (text.isNotEmpty) {
ref
.read(settingsProvider.notifier)
.setTtsStyle(text);
}
Navigator.pop(ctx);
},
child: const Text('确认'),
),
),
],
),
],
),
);
},
);
}
// ---- Edit Name Dialog ----------------------------------------------------- // ---- Edit Name Dialog -----------------------------------------------------
void _showEditNameDialog(String currentName) { void _showEditNameDialog(String currentName) {

View File

@ -124,6 +124,16 @@ class SettingsNotifier extends StateNotifier<AppSettings> {
await _repository?.saveSettings(state); await _repository?.saveSettings(state);
} }
Future<void> setTtsVoice(String voice) async {
state = state.copyWith(ttsVoice: voice);
await _repository?.saveSettings(state);
}
Future<void> setTtsStyle(String style) async {
state = state.copyWith(ttsStyle: style);
await _repository?.saveSettings(state);
}
Future<void> resetToDefaults() async { Future<void> resetToDefaults() async {
await _repository?.resetSettings(); await _repository?.resetSettings();
state = const AppSettings(); state = const AppSettings();

View File

@ -9,6 +9,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "85.0.0" version: "85.0.0"
adaptive_number:
dependency: transitive
description:
name: adaptive_number
sha256: "3a567544e9b5c9c803006f51140ad544aedc79604fd4f3f2c1380003f97c1d77"
url: "https://pub.dev"
source: hosted
version: "1.0.0"
analyzer: analyzer:
dependency: transitive dependency: transitive
description: description:
@ -177,6 +185,22 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "1.19.1" version: "1.19.1"
connectivity_plus:
dependency: transitive
description:
name: connectivity_plus
sha256: "33bae12a398f841c6cda09d1064212957265869104c478e5ad51e2fb26c3973c"
url: "https://pub.dev"
source: hosted
version: "7.0.0"
connectivity_plus_platform_interface:
dependency: transitive
description:
name: connectivity_plus_platform_interface
sha256: "42657c1715d48b167930d5f34d00222ac100475f73d10162ddf43e714932f204"
url: "https://pub.dev"
source: hosted
version: "2.0.1"
convert: convert:
dependency: transitive dependency: transitive
description: description:
@ -225,6 +249,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "1.0.0+7.7.0" version: "1.0.0+7.7.0"
dart_jsonwebtoken:
dependency: transitive
description:
name: dart_jsonwebtoken
sha256: c6ecb3bb991c459b91c5adf9e871113dcb32bbe8fe7ca2c92723f88ffc1e0b7a
url: "https://pub.dev"
source: hosted
version: "3.3.2"
dart_style: dart_style:
dependency: transitive dependency: transitive
description: description:
@ -233,6 +265,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "3.1.1" version: "3.1.1"
dart_webrtc:
dependency: transitive
description:
name: dart_webrtc
sha256: "4ed7b9fa9924e5a81eb39271e2c2356739dd1039d60a13b86ba6c5f448625086"
url: "https://pub.dev"
source: hosted
version: "1.7.0"
dbus: dbus:
dependency: transitive dependency: transitive
description: description:
@ -241,6 +281,22 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "0.7.12" version: "0.7.12"
device_info_plus:
dependency: transitive
description:
name: device_info_plus
sha256: "4df8babf73058181227e18b08e6ea3520cf5fc5d796888d33b7cb0f33f984b7c"
url: "https://pub.dev"
source: hosted
version: "12.3.0"
device_info_plus_platform_interface:
dependency: transitive
description:
name: device_info_plus_platform_interface
sha256: e1ea89119e34903dca74b883d0dd78eb762814f97fb6c76f35e9ff74d261a18f
url: "https://pub.dev"
source: hosted
version: "7.0.3"
dio: dio:
dependency: "direct main" dependency: "direct main"
description: description:
@ -257,6 +313,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "2.1.1" version: "2.1.1"
ed25519_edwards:
dependency: transitive
description:
name: ed25519_edwards
sha256: "6ce0112d131327ec6d42beede1e5dfd526069b18ad45dcf654f15074ad9276cd"
url: "https://pub.dev"
source: hosted
version: "0.3.1"
equatable: equatable:
dependency: transitive dependency: transitive
description: description:
@ -289,6 +353,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "7.0.1" version: "7.0.1"
file_picker:
dependency: "direct main"
description:
name: file_picker
sha256: ab13ae8ef5580a411c458d6207b6774a6c237d77ac37011b13994879f68a8810
url: "https://pub.dev"
source: hosted
version: "8.3.7"
file_selector_linux: file_selector_linux:
dependency: transitive dependency: transitive
description: description:
@ -504,6 +576,14 @@ packages:
description: flutter description: flutter
source: sdk source: sdk
version: "0.0.0" version: "0.0.0"
flutter_webrtc:
dependency: transitive
description:
name: flutter_webrtc
sha256: "0f86b518e9349e71a136a96e0ea11294cad8a8531b2bc9ae99e69df332ac898a"
url: "https://pub.dev"
source: hosted
version: "1.3.0"
freezed: freezed:
dependency: "direct dev" dependency: "direct dev"
description: description:
@ -744,6 +824,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "3.0.0" version: "3.0.0"
livekit_client:
dependency: "direct main"
description:
name: livekit_client
sha256: "51d97a4501e385e53c140b8367a52316af5b466e71e6d800c8826065b6061521"
url: "https://pub.dev"
source: hosted
version: "2.6.4"
logger: logger:
dependency: "direct main" dependency: "direct main"
description: description:
@ -792,6 +880,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "2.0.0" version: "2.0.0"
mime_type:
dependency: transitive
description:
name: mime_type
sha256: d652b613e84dac1af28030a9fba82c0999be05b98163f9e18a0849c6e63838bb
url: "https://pub.dev"
source: hosted
version: "1.0.1"
mockito: mockito:
dependency: "direct dev" dependency: "direct dev"
description: description:
@ -808,6 +904,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "1.0.0" version: "1.0.0"
nm:
dependency: transitive
description:
name: nm
sha256: "2c9aae4127bdc8993206464fcc063611e0e36e72018696cd9631023a31b24254"
url: "https://pub.dev"
source: hosted
version: "0.5.0"
package_config: package_config:
dependency: transitive dependency: transitive
description: description:
@ -968,6 +1072,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "2.1.8" version: "2.1.8"
pointycastle:
dependency: transitive
description:
name: pointycastle
sha256: "92aa3841d083cc4b0f4709b5c74fd6409a3e6ba833ffc7dc6a8fee096366acf5"
url: "https://pub.dev"
source: hosted
version: "4.0.0"
pool: pool:
dependency: transitive dependency: transitive
description: description:
@ -984,6 +1096,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "6.5.0" version: "6.5.0"
protobuf:
dependency: transitive
description:
name: protobuf
sha256: "75ec242d22e950bdcc79ee38dd520ce4ee0bc491d7fadc4ea47694604d22bf06"
url: "https://pub.dev"
source: hosted
version: "6.0.0"
provider: provider:
dependency: transitive dependency: transitive
description: description:
@ -1120,6 +1240,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "0.28.0" version: "0.28.0"
sdp_transform:
dependency: transitive
description:
name: sdp_transform
sha256: "73e412a5279a5c2de74001535208e20fff88f225c9a4571af0f7146202755e45"
url: "https://pub.dev"
source: hosted
version: "0.3.2"
shared_preferences: shared_preferences:
dependency: "direct main" dependency: "direct main"
description: description:
@ -1517,6 +1645,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "2.4.0" version: "2.4.0"
webrtc_interface:
dependency: transitive
description:
name: webrtc_interface
sha256: ad0e5786b2acd3be72a3219ef1dde9e1cac071cf4604c685f11b61d63cdd6eb3
url: "https://pub.dev"
source: hosted
version: "1.4.0"
win32: win32:
dependency: transitive dependency: transitive
description: description:
@ -1525,6 +1661,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "5.15.0" version: "5.15.0"
win32_registry:
dependency: transitive
description:
name: win32_registry
sha256: "6f1b564492d0147b330dd794fee8f512cec4977957f310f9951b5f9d83618dae"
url: "https://pub.dev"
source: hosted
version: "2.1.0"
xdg_directories: xdg_directories:
dependency: transitive dependency: transitive
description: description:

View File

@ -125,14 +125,19 @@ async def entrypoint(ctx: JobContext) -> None:
# The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata, # The token endpoint embeds {"auth_header": "Bearer ..."} via RoomAgentDispatch metadata,
# which LiveKit passes through as job.metadata to the agent worker. # which LiveKit passes through as job.metadata to the agent worker.
auth_header = "" auth_header = ""
tts_voice = settings.openai_tts_voice
tts_style = ""
try: try:
meta_str = ctx.job.metadata or "{}" meta_str = ctx.job.metadata or "{}"
meta = json.loads(meta_str) meta = json.loads(meta_str)
auth_header = meta.get("auth_header", "") auth_header = meta.get("auth_header", "")
tts_voice = meta.get("tts_voice", settings.openai_tts_voice)
tts_style = meta.get("tts_style", "")
except Exception as e: except Exception as e:
logger.warning("Failed to parse job metadata: %s", e) logger.warning("Failed to parse job metadata: %s", e)
logger.info("Auth header present: %s", bool(auth_header)) logger.info("Auth header present: %s, TTS: voice=%s, style=%s",
bool(auth_header), tts_voice, tts_style[:50] if tts_style else "(default)")
# Build STT # Build STT
if settings.stt_provider == "openai": if settings.stt_provider == "openai":
@ -165,10 +170,11 @@ async def entrypoint(ctx: JobContext) -> None:
_http_client_tts = _httpx.AsyncClient(verify=False) _http_client_tts = _httpx.AsyncClient(verify=False)
_oai_client_tts = _openai.AsyncOpenAI(http_client=_http_client_tts) _oai_client_tts = _openai.AsyncOpenAI(http_client=_http_client_tts)
default_instructions = "用自然、友好的中文语气说话,语速适中,像真人助手一样。"
tts = openai_plugin.TTS( tts = openai_plugin.TTS(
model=settings.openai_tts_model, model=settings.openai_tts_model,
voice=settings.openai_tts_voice, voice=tts_voice,
instructions="用自然、友好的中文语气说话,语速适中,像真人助手一样。", instructions=tts_style if tts_style else default_instructions,
client=_oai_client_tts, client=_oai_client_tts,
) )
else: else:

View File

@ -9,8 +9,10 @@ Generates a LiveKit room JWT for voice calls. The token includes:
import json import json
import uuid import uuid
from typing import Optional
from fastapi import APIRouter, Request from fastapi import APIRouter, Request
from pydantic import BaseModel
from livekit import api as livekit_api from livekit import api as livekit_api
@ -19,18 +21,30 @@ from ..config.settings import settings
router = APIRouter() router = APIRouter()
class TokenRequest(BaseModel):
tts_voice: Optional[str] = None
tts_style: Optional[str] = None
@router.post("/livekit/token") @router.post("/livekit/token")
async def create_livekit_token(request: Request): async def create_livekit_token(request: Request, body: TokenRequest = TokenRequest()):
"""Generate a LiveKit room token for a voice call session. """Generate a LiveKit room token for a voice call session.
The caller's Authorization header is embedded in the agent dispatch The caller's Authorization header and optional TTS preferences are
metadata so that the voice-agent can forward it to agent-service. embedded in the agent dispatch metadata so the voice-agent can use them.
""" """
auth_header = request.headers.get("authorization", "") auth_header = request.headers.get("authorization", "")
room_name = f"voice-{uuid.uuid4().hex[:12]}" room_name = f"voice-{uuid.uuid4().hex[:12]}"
participant_identity = f"user-{uuid.uuid4().hex[:8]}" participant_identity = f"user-{uuid.uuid4().hex[:8]}"
# Build metadata with auth + optional voice preferences
metadata: dict = {"auth_header": auth_header}
if body.tts_voice:
metadata["tts_voice"] = body.tts_voice
if body.tts_style:
metadata["tts_style"] = body.tts_style
token = ( token = (
livekit_api.AccessToken(settings.livekit_api_key, settings.livekit_api_secret) livekit_api.AccessToken(settings.livekit_api_key, settings.livekit_api_secret)
.with_identity(participant_identity) .with_identity(participant_identity)
@ -49,7 +63,7 @@ async def create_livekit_token(request: Request):
agents=[ agents=[
livekit_api.RoomAgentDispatch( livekit_api.RoomAgentDispatch(
agent_name="voice-agent", agent_name="voice-agent",
metadata=json.dumps({"auth_header": auth_header}), metadata=json.dumps(metadata),
) )
], ],
) )