feat: add voice I/O test page in Flutter settings

- TTS: text input → Kokoro synthesis → audio playback - STT: long-press record → faster-whisper transcription - Round-trip: record → STT → TTS → playback - Added /api/v1/test route to Kong gateway for voice-service - Accessible from Settings → 语音 I/O 测试 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 05:16:10 -08:00 · 2026-02-24 05:16:10 -08:00 · 6e832c7615
parent 0bd050c80f
commit 6e832c7615
3 changed files with 384 additions and 0 deletions
--- a/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart
+++ b/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart
@ -0,0 +1,360 @@
 import 'dart:async';
 import 'dart:typed_data';
 import 'package:flutter/material.dart';
 import 'package:flutter_riverpod/flutter_riverpod.dart';
 import 'package:dio/dio.dart';
 import 'package:record/record.dart';
 import 'package:audioplayers/audioplayers.dart';
 import '../../../../core/network/dio_client.dart';
 /// Temporary voice I/O test page — TTS + STT + Round-trip.
 class VoiceTestPage extends ConsumerStatefulWidget {
  const VoiceTestPage({super.key});
  @override
  ConsumerState<VoiceTestPage> createState() => _VoiceTestPageState();
 }
 class _VoiceTestPageState extends ConsumerState<VoiceTestPage> {
  final _ttsController = TextEditingController(
    text: '你好，我是IT0运维助手。很高兴为您服务！',
  );
  final _audioPlayer = AudioPlayer();
  final _recorder = AudioRecorder();
  String _ttsStatus = '';
  String _sttStatus = '';
  String _sttResult = '';
  String _rtStatus = '';
  String _rtResult = '';
  bool _isRecording = false;
  bool _isSynthesizing = false;
  String _recordMode = ''; // 'stt' or 'rt'
  /// Dio for binary responses (TTS audio).
  Dio get _dioBinary {
    final base = ref.read(dioClientProvider);
    return Dio(BaseOptions(
      baseUrl: base.options.baseUrl,
      headers: Map.from(base.options.headers),
      connectTimeout: const Duration(seconds: 30),
      receiveTimeout: const Duration(seconds: 60),
      responseType: ResponseType.bytes,
    ))..interceptors.addAll(base.interceptors);
  }
  /// Dio for JSON responses (STT).
  Dio get _dioJson => ref.read(dioClientProvider);
  @override
  void dispose() {
    _ttsController.dispose();
    _audioPlayer.dispose();
    _recorder.dispose();
    super.dispose();
  }
  // ---- TTS ----
  Future<void> _doTTS() async {
    final text = _ttsController.text.trim();
    if (text.isEmpty) return;
    setState(() {
      _isSynthesizing = true;
      _ttsStatus = '合成中...';
    });
    final sw = Stopwatch()..start();
    try {
      final resp = await _dioBinary.get(
        '/api/v1/test/tts/synthesize',
        queryParameters: {'text': text},
      );
      sw.stop();
      final bytes = resp.data as List<int>;
      setState(() {
        _ttsStatus = '完成！耗时 ${sw.elapsedMilliseconds}ms，大小 ${(bytes.length / 1024).toStringAsFixed(1)}KB';
      });
      await _audioPlayer.play(BytesSource(Uint8List.fromList(bytes)));
    } catch (e) {
      sw.stop();
      setState(() => _ttsStatus = '错误: $e');
    } finally {
      setState(() => _isSynthesizing = false);
    }
  }
  // ---- Recording ----
  Future<void> _startRecording(String mode) async {
    if (_isRecording) return;
    final hasPermission = await _recorder.hasPermission();
    if (!hasPermission) {
      setState(() {
        if (mode == 'stt') {
          _sttStatus = '麦克风权限被拒绝';
        } else {
          _rtStatus = '麦克风权限被拒绝';
        }
      });
      return;
    }
    _recordMode = mode;
    setState(() {
      _isRecording = true;
      if (mode == 'stt') {
        _sttStatus = '录音中...';
        _sttResult = '';
      } else {
        _rtStatus = '录音中...';
        _rtResult = '';
      }
    });
    await _recorder.start(
      const RecordConfig(
        encoder: AudioEncoder.wav,
        sampleRate: 16000,
        numChannels: 1,
        bitRate: 256000,
      ),
      path: '',  // temp file
    );
  }
  Future<void> _stopRecording() async {
    if (!_isRecording) return;
    final path = await _recorder.stop();
    setState(() => _isRecording = false);
    if (path == null || path.isEmpty) return;
    if (_recordMode == 'stt') {
      _doSTT(path);
    } else {
      _doRoundTrip(path);
    }
  }
  // ---- STT ----
  Future<void> _doSTT(String audioPath) async {
    setState(() => _sttStatus = '识别中...');
    final sw = Stopwatch()..start();
    try {
      final formData = FormData.fromMap({
        'audio': await MultipartFile.fromFile(audioPath, filename: 'recording.wav'),
      });
      final resp = await _dioJson.post('/api/v1/test/stt/transcribe', data: formData);
      sw.stop();
      final data = resp.data as Map<String, dynamic>;
      setState(() {
        _sttResult = data['text'] ?? '(empty)';
        _sttStatus = '完成！耗时 ${sw.elapsedMilliseconds}ms，时长 ${data['duration'] ?? 0}s';
      });
    } catch (e) {
      sw.stop();
      setState(() {
        _sttStatus = '错误: $e';
        _sttResult = '';
      });
    }
  }
  // ---- Round-trip: STT → TTS ----
  Future<void> _doRoundTrip(String audioPath) async {
    setState(() => _rtStatus = 'STT 识别中...');
    final totalSw = Stopwatch()..start();
    try {
      // 1. STT
      final sttSw = Stopwatch()..start();
      final formData = FormData.fromMap({
        'audio': await MultipartFile.fromFile(audioPath, filename: 'recording.wav'),
      });
      final sttResp = await _dioJson.post('/api/v1/test/stt/transcribe', data: formData);
      sttSw.stop();
      final sttData = sttResp.data as Map<String, dynamic>;
      final text = sttData['text'] ?? '';
      setState(() {
        _rtResult = 'STT (${sttSw.elapsedMilliseconds}ms): $text';
        _rtStatus = 'TTS 合成中...';
      });
      if (text.isEmpty) {
        setState(() => _rtStatus = 'STT 识别为空');
        return;
      }
      // 2. TTS
      final ttsSw = Stopwatch()..start();
      final ttsResp = await _dioBinary.get(
        '/api/v1/test/tts/synthesize',
        queryParameters: {'text': text},
      );
      ttsSw.stop();
      totalSw.stop();
      final audioBytes = ttsResp.data as List<int>;
      setState(() {
        _rtResult += '\nTTS (${ttsSw.elapsedMilliseconds}ms): ${(audioBytes.length / 1024).toStringAsFixed(1)}KB';
        _rtStatus = '完成！STT=${sttSw.elapsedMilliseconds}ms + TTS=${ttsSw.elapsedMilliseconds}ms = ${totalSw.elapsedMilliseconds}ms';
      });
      await _audioPlayer.play(BytesSource(Uint8List.fromList(audioBytes)));
    } catch (e) {
      totalSw.stop();
      setState(() {
        _rtStatus = '错误: $e';
      });
    }
  }
  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(title: const Text('语音 I/O 测试')),
      body: ListView(
        padding: const EdgeInsets.all(16),
        children: [
          // TTS Section
          _buildSection(
            title: 'TTS (文本转语音)',
            child: Column(
              crossAxisAlignment: CrossAxisAlignment.start,
              children: [
                TextField(
                  controller: _ttsController,
                  maxLines: 3,
                  decoration: const InputDecoration(
                    hintText: '输入要合成的文本...',
                    border: OutlineInputBorder(),
                  ),
                ),
                const SizedBox(height: 8),
                ElevatedButton.icon(
                  onPressed: _isSynthesizing ? null : _doTTS,
                  icon: _isSynthesizing
                      ? const SizedBox(width: 16, height: 16, child: CircularProgressIndicator(strokeWidth: 2))
                      : const Icon(Icons.volume_up),
                  label: Text(_isSynthesizing ? '合成中...' : '合成语音'),
                ),
                if (_ttsStatus.isNotEmpty)
                  Padding(
                    padding: const EdgeInsets.only(top: 8),
                    child: Text(_ttsStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)),
                  ),
              ],
            ),
          ),
          const SizedBox(height: 16),
          // STT Section
          _buildSection(
            title: 'STT (语音转文本)',
            child: Column(
              crossAxisAlignment: CrossAxisAlignment.start,
              children: [
                GestureDetector(
                  onLongPressStart: (_) => _startRecording('stt'),
                  onLongPressEnd: (_) => _stopRecording(),
                  child: ElevatedButton.icon(
                    onPressed: () {},
                    style: ElevatedButton.styleFrom(
                      backgroundColor: _isRecording && _recordMode == 'stt'
                          ? Colors.red
                          : null,
                    ),
                    icon: Icon(_isRecording && _recordMode == 'stt'
                        ? Icons.mic
                        : Icons.mic_none),
                    label: Text(_isRecording && _recordMode == 'stt'
                        ? '录音中... 松开结束'
                        : '长按录音'),
                  ),
                ),
                if (_sttStatus.isNotEmpty)
                  Padding(
                    padding: const EdgeInsets.only(top: 8),
                    child: Text(_sttStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)),
                  ),
                if (_sttResult.isNotEmpty)
                  Container(
                    width: double.infinity,
                    margin: const EdgeInsets.only(top: 8),
                    padding: const EdgeInsets.all(12),
                    decoration: BoxDecoration(
                      color: Colors.grey[100],
                      borderRadius: BorderRadius.circular(8),
                    ),
                    child: Text(_sttResult, style: const TextStyle(fontSize: 16)),
                  ),
              ],
            ),
          ),
          const SizedBox(height: 16),
          // Round-trip Section
          _buildSection(
            title: 'Round-trip (STT + TTS)',
            subtitle: '录音 → 识别文本 → 合成语音播放',
            child: Column(
              crossAxisAlignment: CrossAxisAlignment.start,
              children: [
                GestureDetector(
                  onLongPressStart: (_) => _startRecording('rt'),
                  onLongPressEnd: (_) => _stopRecording(),
                  child: ElevatedButton.icon(
                    onPressed: () {},
                    style: ElevatedButton.styleFrom(
                      backgroundColor: _isRecording && _recordMode == 'rt'
                          ? Colors.red
                          : null,
                    ),
                    icon: Icon(_isRecording && _recordMode == 'rt'
                        ? Icons.mic
                        : Icons.swap_horiz),
                    label: Text(_isRecording && _recordMode == 'rt'
                        ? '录音中... 松开结束'
                        : '长按说话 (Round-trip)'),
                  ),
                ),
                if (_rtStatus.isNotEmpty)
                  Padding(
                    padding: const EdgeInsets.only(top: 8),
                    child: Text(_rtStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)),
                  ),
                if (_rtResult.isNotEmpty)
                  Container(
                    width: double.infinity,
                    margin: const EdgeInsets.only(top: 8),
                    padding: const EdgeInsets.all(12),
                    decoration: BoxDecoration(
                      color: Colors.grey[100],
                      borderRadius: BorderRadius.circular(8),
                    ),
                    child: Text(_rtResult, style: const TextStyle(fontSize: 14)),
                  ),
              ],
            ),
          ),
        ],
      ),
    );
  }
  Widget _buildSection({
    required String title,
    String? subtitle,
    required Widget child,
  }) {
    return Card(
      child: Padding(
        padding: const EdgeInsets.all(16),
        child: Column(
          crossAxisAlignment: CrossAxisAlignment.start,
          children: [
            Text(title, style: const TextStyle(fontSize: 16, fontWeight: FontWeight.bold)),
            if (subtitle != null)
              Padding(
                padding: const EdgeInsets.only(top: 4),
                child: Text(subtitle, style: TextStyle(fontSize: 12, color: Colors.grey[500])),
              ),
            const SizedBox(height: 12),
            child,
          ],
        ),
      ),
    );
  }
 }
--- a/it0_app/lib/features/settings/presentation/pages/settings_page.dart
+++ b/it0_app/lib/features/settings/presentation/pages/settings_page.dart
@ -3,6 +3,7 @@ import 'package:flutter_riverpod/flutter_riverpod.dart';
 import 'package:go_router/go_router.dart';
 import '../../../../core/theme/app_colors.dart';
 import '../../../auth/data/providers/auth_provider.dart';
 import '../../../agent_call/presentation/pages/voice_test_page.dart';
 import '../providers/settings_providers.dart';
 class SettingsPage extends ConsumerStatefulWidget {
@ -132,6 +133,25 @@ class _SettingsPageState extends ConsumerState<SettingsPage> {
                ),
            ],
          ),
          const SizedBox(height: 24),
          // ===== Dev / Debug Group =====
          _SettingsGroup(
            cardColor: cardColor,
            children: [
              _SettingsRow(
                icon: Icons.record_voice_over,
                iconBg: const Color(0xFF10B981),
                title: '语音 I/O 测试',
                trailing: Text('TTS / STT',
                    style: TextStyle(color: subtitleColor, fontSize: 14)),
                onTap: () => Navigator.push(
                  context,
                  MaterialPageRoute(builder: (_) => const VoiceTestPage()),
                ),
              ),
            ],
          ),
          const SizedBox(height: 32),
          // ===== Logout =====
--- a/packages/gateway/config/kong.yml
+++ b/packages/gateway/config/kong.yml
@ -100,6 +100,10 @@ services:
        paths:
          - /api/v1/twilio
        strip_path: false
      - name: voice-test
        paths:
          - /api/v1/test
        strip_path: false
  - name: audit-service
    url: http://audit-service:3007