From 6e832c761574baa5b2b7249ca14392186e09e920 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 24 Feb 2026 05:16:10 -0800 Subject: [PATCH] feat: add voice I/O test page in Flutter settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TTS: text input → Kokoro synthesis → audio playback - STT: long-press record → faster-whisper transcription - Round-trip: record → STT → TTS → playback - Added /api/v1/test route to Kong gateway for voice-service - Accessible from Settings → 语音 I/O 测试 Co-Authored-By: Claude Opus 4.6 --- .../presentation/pages/voice_test_page.dart | 360 ++++++++++++++++++ .../presentation/pages/settings_page.dart | 20 + packages/gateway/config/kong.yml | 4 + 3 files changed, 384 insertions(+) create mode 100644 it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart diff --git a/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart b/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart new file mode 100644 index 0000000..739f07e --- /dev/null +++ b/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart @@ -0,0 +1,360 @@ +import 'dart:async'; +import 'dart:typed_data'; +import 'package:flutter/material.dart'; +import 'package:flutter_riverpod/flutter_riverpod.dart'; +import 'package:dio/dio.dart'; +import 'package:record/record.dart'; +import 'package:audioplayers/audioplayers.dart'; +import '../../../../core/network/dio_client.dart'; + +/// Temporary voice I/O test page — TTS + STT + Round-trip. +class VoiceTestPage extends ConsumerStatefulWidget { + const VoiceTestPage({super.key}); + + @override + ConsumerState createState() => _VoiceTestPageState(); +} + +class _VoiceTestPageState extends ConsumerState { + final _ttsController = TextEditingController( + text: '你好,我是IT0运维助手。很高兴为您服务!', + ); + final _audioPlayer = AudioPlayer(); + final _recorder = AudioRecorder(); + + String _ttsStatus = ''; + String _sttStatus = ''; + String _sttResult = ''; + String _rtStatus = ''; + String _rtResult = ''; + bool _isRecording = false; + bool _isSynthesizing = false; + String _recordMode = ''; // 'stt' or 'rt' + + /// Dio for binary responses (TTS audio). + Dio get _dioBinary { + final base = ref.read(dioClientProvider); + return Dio(BaseOptions( + baseUrl: base.options.baseUrl, + headers: Map.from(base.options.headers), + connectTimeout: const Duration(seconds: 30), + receiveTimeout: const Duration(seconds: 60), + responseType: ResponseType.bytes, + ))..interceptors.addAll(base.interceptors); + } + + /// Dio for JSON responses (STT). + Dio get _dioJson => ref.read(dioClientProvider); + + @override + void dispose() { + _ttsController.dispose(); + _audioPlayer.dispose(); + _recorder.dispose(); + super.dispose(); + } + + // ---- TTS ---- + Future _doTTS() async { + final text = _ttsController.text.trim(); + if (text.isEmpty) return; + setState(() { + _isSynthesizing = true; + _ttsStatus = '合成中...'; + }); + final sw = Stopwatch()..start(); + try { + final resp = await _dioBinary.get( + '/api/v1/test/tts/synthesize', + queryParameters: {'text': text}, + ); + sw.stop(); + final bytes = resp.data as List; + setState(() { + _ttsStatus = '完成!耗时 ${sw.elapsedMilliseconds}ms,大小 ${(bytes.length / 1024).toStringAsFixed(1)}KB'; + }); + await _audioPlayer.play(BytesSource(Uint8List.fromList(bytes))); + } catch (e) { + sw.stop(); + setState(() => _ttsStatus = '错误: $e'); + } finally { + setState(() => _isSynthesizing = false); + } + } + + // ---- Recording ---- + Future _startRecording(String mode) async { + if (_isRecording) return; + final hasPermission = await _recorder.hasPermission(); + if (!hasPermission) { + setState(() { + if (mode == 'stt') { + _sttStatus = '麦克风权限被拒绝'; + } else { + _rtStatus = '麦克风权限被拒绝'; + } + }); + return; + } + _recordMode = mode; + setState(() { + _isRecording = true; + if (mode == 'stt') { + _sttStatus = '录音中...'; + _sttResult = ''; + } else { + _rtStatus = '录音中...'; + _rtResult = ''; + } + }); + await _recorder.start( + const RecordConfig( + encoder: AudioEncoder.wav, + sampleRate: 16000, + numChannels: 1, + bitRate: 256000, + ), + path: '', // temp file + ); + } + + Future _stopRecording() async { + if (!_isRecording) return; + final path = await _recorder.stop(); + setState(() => _isRecording = false); + if (path == null || path.isEmpty) return; + + if (_recordMode == 'stt') { + _doSTT(path); + } else { + _doRoundTrip(path); + } + } + + // ---- STT ---- + Future _doSTT(String audioPath) async { + setState(() => _sttStatus = '识别中...'); + final sw = Stopwatch()..start(); + try { + final formData = FormData.fromMap({ + 'audio': await MultipartFile.fromFile(audioPath, filename: 'recording.wav'), + }); + final resp = await _dioJson.post('/api/v1/test/stt/transcribe', data: formData); + sw.stop(); + final data = resp.data as Map; + setState(() { + _sttResult = data['text'] ?? '(empty)'; + _sttStatus = '完成!耗时 ${sw.elapsedMilliseconds}ms,时长 ${data['duration'] ?? 0}s'; + }); + } catch (e) { + sw.stop(); + setState(() { + _sttStatus = '错误: $e'; + _sttResult = ''; + }); + } + } + + // ---- Round-trip: STT → TTS ---- + Future _doRoundTrip(String audioPath) async { + setState(() => _rtStatus = 'STT 识别中...'); + final totalSw = Stopwatch()..start(); + try { + // 1. STT + final sttSw = Stopwatch()..start(); + final formData = FormData.fromMap({ + 'audio': await MultipartFile.fromFile(audioPath, filename: 'recording.wav'), + }); + final sttResp = await _dioJson.post('/api/v1/test/stt/transcribe', data: formData); + sttSw.stop(); + final sttData = sttResp.data as Map; + final text = sttData['text'] ?? ''; + setState(() { + _rtResult = 'STT (${sttSw.elapsedMilliseconds}ms): $text'; + _rtStatus = 'TTS 合成中...'; + }); + if (text.isEmpty) { + setState(() => _rtStatus = 'STT 识别为空'); + return; + } + + // 2. TTS + final ttsSw = Stopwatch()..start(); + final ttsResp = await _dioBinary.get( + '/api/v1/test/tts/synthesize', + queryParameters: {'text': text}, + ); + ttsSw.stop(); + totalSw.stop(); + final audioBytes = ttsResp.data as List; + setState(() { + _rtResult += '\nTTS (${ttsSw.elapsedMilliseconds}ms): ${(audioBytes.length / 1024).toStringAsFixed(1)}KB'; + _rtStatus = '完成!STT=${sttSw.elapsedMilliseconds}ms + TTS=${ttsSw.elapsedMilliseconds}ms = ${totalSw.elapsedMilliseconds}ms'; + }); + await _audioPlayer.play(BytesSource(Uint8List.fromList(audioBytes))); + } catch (e) { + totalSw.stop(); + setState(() { + _rtStatus = '错误: $e'; + }); + } + } + + @override + Widget build(BuildContext context) { + return Scaffold( + appBar: AppBar(title: const Text('语音 I/O 测试')), + body: ListView( + padding: const EdgeInsets.all(16), + children: [ + // TTS Section + _buildSection( + title: 'TTS (文本转语音)', + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + TextField( + controller: _ttsController, + maxLines: 3, + decoration: const InputDecoration( + hintText: '输入要合成的文本...', + border: OutlineInputBorder(), + ), + ), + const SizedBox(height: 8), + ElevatedButton.icon( + onPressed: _isSynthesizing ? null : _doTTS, + icon: _isSynthesizing + ? const SizedBox(width: 16, height: 16, child: CircularProgressIndicator(strokeWidth: 2)) + : const Icon(Icons.volume_up), + label: Text(_isSynthesizing ? '合成中...' : '合成语音'), + ), + if (_ttsStatus.isNotEmpty) + Padding( + padding: const EdgeInsets.only(top: 8), + child: Text(_ttsStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)), + ), + ], + ), + ), + const SizedBox(height: 16), + + // STT Section + _buildSection( + title: 'STT (语音转文本)', + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + GestureDetector( + onLongPressStart: (_) => _startRecording('stt'), + onLongPressEnd: (_) => _stopRecording(), + child: ElevatedButton.icon( + onPressed: () {}, + style: ElevatedButton.styleFrom( + backgroundColor: _isRecording && _recordMode == 'stt' + ? Colors.red + : null, + ), + icon: Icon(_isRecording && _recordMode == 'stt' + ? Icons.mic + : Icons.mic_none), + label: Text(_isRecording && _recordMode == 'stt' + ? '录音中... 松开结束' + : '长按录音'), + ), + ), + if (_sttStatus.isNotEmpty) + Padding( + padding: const EdgeInsets.only(top: 8), + child: Text(_sttStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)), + ), + if (_sttResult.isNotEmpty) + Container( + width: double.infinity, + margin: const EdgeInsets.only(top: 8), + padding: const EdgeInsets.all(12), + decoration: BoxDecoration( + color: Colors.grey[100], + borderRadius: BorderRadius.circular(8), + ), + child: Text(_sttResult, style: const TextStyle(fontSize: 16)), + ), + ], + ), + ), + const SizedBox(height: 16), + + // Round-trip Section + _buildSection( + title: 'Round-trip (STT + TTS)', + subtitle: '录音 → 识别文本 → 合成语音播放', + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + GestureDetector( + onLongPressStart: (_) => _startRecording('rt'), + onLongPressEnd: (_) => _stopRecording(), + child: ElevatedButton.icon( + onPressed: () {}, + style: ElevatedButton.styleFrom( + backgroundColor: _isRecording && _recordMode == 'rt' + ? Colors.red + : null, + ), + icon: Icon(_isRecording && _recordMode == 'rt' + ? Icons.mic + : Icons.swap_horiz), + label: Text(_isRecording && _recordMode == 'rt' + ? '录音中... 松开结束' + : '长按说话 (Round-trip)'), + ), + ), + if (_rtStatus.isNotEmpty) + Padding( + padding: const EdgeInsets.only(top: 8), + child: Text(_rtStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)), + ), + if (_rtResult.isNotEmpty) + Container( + width: double.infinity, + margin: const EdgeInsets.only(top: 8), + padding: const EdgeInsets.all(12), + decoration: BoxDecoration( + color: Colors.grey[100], + borderRadius: BorderRadius.circular(8), + ), + child: Text(_rtResult, style: const TextStyle(fontSize: 14)), + ), + ], + ), + ), + ], + ), + ); + } + + Widget _buildSection({ + required String title, + String? subtitle, + required Widget child, + }) { + return Card( + child: Padding( + padding: const EdgeInsets.all(16), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text(title, style: const TextStyle(fontSize: 16, fontWeight: FontWeight.bold)), + if (subtitle != null) + Padding( + padding: const EdgeInsets.only(top: 4), + child: Text(subtitle, style: TextStyle(fontSize: 12, color: Colors.grey[500])), + ), + const SizedBox(height: 12), + child, + ], + ), + ), + ); + } +} diff --git a/it0_app/lib/features/settings/presentation/pages/settings_page.dart b/it0_app/lib/features/settings/presentation/pages/settings_page.dart index 291cf5b..099d97e 100644 --- a/it0_app/lib/features/settings/presentation/pages/settings_page.dart +++ b/it0_app/lib/features/settings/presentation/pages/settings_page.dart @@ -3,6 +3,7 @@ import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:go_router/go_router.dart'; import '../../../../core/theme/app_colors.dart'; import '../../../auth/data/providers/auth_provider.dart'; +import '../../../agent_call/presentation/pages/voice_test_page.dart'; import '../providers/settings_providers.dart'; class SettingsPage extends ConsumerStatefulWidget { @@ -132,6 +133,25 @@ class _SettingsPageState extends ConsumerState { ), ], ), + const SizedBox(height: 24), + + // ===== Dev / Debug Group ===== + _SettingsGroup( + cardColor: cardColor, + children: [ + _SettingsRow( + icon: Icons.record_voice_over, + iconBg: const Color(0xFF10B981), + title: '语音 I/O 测试', + trailing: Text('TTS / STT', + style: TextStyle(color: subtitleColor, fontSize: 14)), + onTap: () => Navigator.push( + context, + MaterialPageRoute(builder: (_) => const VoiceTestPage()), + ), + ), + ], + ), const SizedBox(height: 32), // ===== Logout ===== diff --git a/packages/gateway/config/kong.yml b/packages/gateway/config/kong.yml index 7e4b8fd..648f9b5 100644 --- a/packages/gateway/config/kong.yml +++ b/packages/gateway/config/kong.yml @@ -100,6 +100,10 @@ services: paths: - /api/v1/twilio strip_path: false + - name: voice-test + paths: + - /api/v1/test + strip_path: false - name: audit-service url: http://audit-service:3007