From 6e832c761574baa5b2b7249ca14392186e09e920 Mon Sep 17 00:00:00 2001
From: hailin <hailin.zhao@gdzx.xyz>
Date: Tue, 24 Feb 2026 05:16:10 -0800
Subject: [PATCH] feat: add voice I/O test page in Flutter settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- TTS: text input → Kokoro synthesis → audio playback
- STT: long-press record → faster-whisper transcription
- Round-trip: record → STT → TTS → playback
- Added /api/v1/test route to Kong gateway for voice-service
- Accessible from Settings → 语音 I/O 测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../presentation/pages/voice_test_page.dart   | 360 ++++++++++++++++++
 .../presentation/pages/settings_page.dart     |  20 +
 packages/gateway/config/kong.yml              |   4 +
 3 files changed, 384 insertions(+)
 create mode 100644 it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart
diff --git a/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart b/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart
new file mode 100644
index 0000000..739f07e
--- /dev/null
+++ b/it0_app/lib/features/agent_call/presentation/pages/voice_test_page.dart
@@ -0,0 +1,360 @@
+import 'dart:async';
+import 'dart:typed_data';
+import 'package:flutter/material.dart';
+import 'package:flutter_riverpod/flutter_riverpod.dart';
+import 'package:dio/dio.dart';
+import 'package:record/record.dart';
+import 'package:audioplayers/audioplayers.dart';
+import '../../../../core/network/dio_client.dart';
+
+/// Temporary voice I/O test page — TTS + STT + Round-trip.
+class VoiceTestPage extends ConsumerStatefulWidget {
+  const VoiceTestPage({super.key});
+
+  @override
+  ConsumerState<VoiceTestPage> createState() => _VoiceTestPageState();
+}
+
+class _VoiceTestPageState extends ConsumerState<VoiceTestPage> {
+  final _ttsController = TextEditingController(
+    text: '你好，我是IT0运维助手。很高兴为您服务！',
+  );
+  final _audioPlayer = AudioPlayer();
+  final _recorder = AudioRecorder();
+
+  String _ttsStatus = '';
+  String _sttStatus = '';
+  String _sttResult = '';
+  String _rtStatus = '';
+  String _rtResult = '';
+  bool _isRecording = false;
+  bool _isSynthesizing = false;
+  String _recordMode = ''; // 'stt' or 'rt'
+
+  /// Dio for binary responses (TTS audio).
+  Dio get _dioBinary {
+    final base = ref.read(dioClientProvider);
+    return Dio(BaseOptions(
+      baseUrl: base.options.baseUrl,
+      headers: Map.from(base.options.headers),
+      connectTimeout: const Duration(seconds: 30),
+      receiveTimeout: const Duration(seconds: 60),
+      responseType: ResponseType.bytes,
+    ))..interceptors.addAll(base.interceptors);
+  }
+
+  /// Dio for JSON responses (STT).
+  Dio get _dioJson => ref.read(dioClientProvider);
+
+  @override
+  void dispose() {
+    _ttsController.dispose();
+    _audioPlayer.dispose();
+    _recorder.dispose();
+    super.dispose();
+  }
+
+  // ---- TTS ----
+  Future<void> _doTTS() async {
+    final text = _ttsController.text.trim();
+    if (text.isEmpty) return;
+    setState(() {
+      _isSynthesizing = true;
+      _ttsStatus = '合成中...';
+    });
+    final sw = Stopwatch()..start();
+    try {
+      final resp = await _dioBinary.get(
+        '/api/v1/test/tts/synthesize',
+        queryParameters: {'text': text},
+      );
+      sw.stop();
+      final bytes = resp.data as List<int>;
+      setState(() {
+        _ttsStatus = '完成！耗时 ${sw.elapsedMilliseconds}ms，大小 ${(bytes.length / 1024).toStringAsFixed(1)}KB';
+      });
+      await _audioPlayer.play(BytesSource(Uint8List.fromList(bytes)));
+    } catch (e) {
+      sw.stop();
+      setState(() => _ttsStatus = '错误: $e');
+    } finally {
+      setState(() => _isSynthesizing = false);
+    }
+  }
+
+  // ---- Recording ----
+  Future<void> _startRecording(String mode) async {
+    if (_isRecording) return;
+    final hasPermission = await _recorder.hasPermission();
+    if (!hasPermission) {
+      setState(() {
+        if (mode == 'stt') {
+          _sttStatus = '麦克风权限被拒绝';
+        } else {
+          _rtStatus = '麦克风权限被拒绝';
+        }
+      });
+      return;
+    }
+    _recordMode = mode;
+    setState(() {
+      _isRecording = true;
+      if (mode == 'stt') {
+        _sttStatus = '录音中...';
+        _sttResult = '';
+      } else {
+        _rtStatus = '录音中...';
+        _rtResult = '';
+      }
+    });
+    await _recorder.start(
+      const RecordConfig(
+        encoder: AudioEncoder.wav,
+        sampleRate: 16000,
+        numChannels: 1,
+        bitRate: 256000,
+      ),
+      path: '',  // temp file
+    );
+  }
+
+  Future<void> _stopRecording() async {
+    if (!_isRecording) return;
+    final path = await _recorder.stop();
+    setState(() => _isRecording = false);
+    if (path == null || path.isEmpty) return;
+
+    if (_recordMode == 'stt') {
+      _doSTT(path);
+    } else {
+      _doRoundTrip(path);
+    }
+  }
+
+  // ---- STT ----
+  Future<void> _doSTT(String audioPath) async {
+    setState(() => _sttStatus = '识别中...');
+    final sw = Stopwatch()..start();
+    try {
+      final formData = FormData.fromMap({
+        'audio': await MultipartFile.fromFile(audioPath, filename: 'recording.wav'),
+      });
+      final resp = await _dioJson.post('/api/v1/test/stt/transcribe', data: formData);
+      sw.stop();
+      final data = resp.data as Map<String, dynamic>;
+      setState(() {
+        _sttResult = data['text'] ?? '(empty)';
+        _sttStatus = '完成！耗时 ${sw.elapsedMilliseconds}ms，时长 ${data['duration'] ?? 0}s';
+      });
+    } catch (e) {
+      sw.stop();
+      setState(() {
+        _sttStatus = '错误: $e';
+        _sttResult = '';
+      });
+    }
+  }
+
+  // ---- Round-trip: STT → TTS ----
+  Future<void> _doRoundTrip(String audioPath) async {
+    setState(() => _rtStatus = 'STT 识别中...');
+    final totalSw = Stopwatch()..start();
+    try {
+      // 1. STT
+      final sttSw = Stopwatch()..start();
+      final formData = FormData.fromMap({
+        'audio': await MultipartFile.fromFile(audioPath, filename: 'recording.wav'),
+      });
+      final sttResp = await _dioJson.post('/api/v1/test/stt/transcribe', data: formData);
+      sttSw.stop();
+      final sttData = sttResp.data as Map<String, dynamic>;
+      final text = sttData['text'] ?? '';
+      setState(() {
+        _rtResult = 'STT (${sttSw.elapsedMilliseconds}ms): $text';
+        _rtStatus = 'TTS 合成中...';
+      });
+      if (text.isEmpty) {
+        setState(() => _rtStatus = 'STT 识别为空');
+        return;
+      }
+
+      // 2. TTS
+      final ttsSw = Stopwatch()..start();
+      final ttsResp = await _dioBinary.get(
+        '/api/v1/test/tts/synthesize',
+        queryParameters: {'text': text},
+      );
+      ttsSw.stop();
+      totalSw.stop();
+      final audioBytes = ttsResp.data as List<int>;
+      setState(() {
+        _rtResult += '\nTTS (${ttsSw.elapsedMilliseconds}ms): ${(audioBytes.length / 1024).toStringAsFixed(1)}KB';
+        _rtStatus = '完成！STT=${sttSw.elapsedMilliseconds}ms + TTS=${ttsSw.elapsedMilliseconds}ms = ${totalSw.elapsedMilliseconds}ms';
+      });
+      await _audioPlayer.play(BytesSource(Uint8List.fromList(audioBytes)));
+    } catch (e) {
+      totalSw.stop();
+      setState(() {
+        _rtStatus = '错误: $e';
+      });
+    }
+  }
+
+  @override
+  Widget build(BuildContext context) {
+    return Scaffold(
+      appBar: AppBar(title: const Text('语音 I/O 测试')),
+      body: ListView(
+        padding: const EdgeInsets.all(16),
+        children: [
+          // TTS Section
+          _buildSection(
+            title: 'TTS (文本转语音)',
+            child: Column(
+              crossAxisAlignment: CrossAxisAlignment.start,
+              children: [
+                TextField(
+                  controller: _ttsController,
+                  maxLines: 3,
+                  decoration: const InputDecoration(
+                    hintText: '输入要合成的文本...',
+                    border: OutlineInputBorder(),
+                  ),
+                ),
+                const SizedBox(height: 8),
+                ElevatedButton.icon(
+                  onPressed: _isSynthesizing ? null : _doTTS,
+                  icon: _isSynthesizing
+                      ? const SizedBox(width: 16, height: 16, child: CircularProgressIndicator(strokeWidth: 2))
+                      : const Icon(Icons.volume_up),
+                  label: Text(_isSynthesizing ? '合成中...' : '合成语音'),
+                ),
+                if (_ttsStatus.isNotEmpty)
+                  Padding(
+                    padding: const EdgeInsets.only(top: 8),
+                    child: Text(_ttsStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)),
+                  ),
+              ],
+            ),
+          ),
+          const SizedBox(height: 16),
+
+          // STT Section
+          _buildSection(
+            title: 'STT (语音转文本)',
+            child: Column(
+              crossAxisAlignment: CrossAxisAlignment.start,
+              children: [
+                GestureDetector(
+                  onLongPressStart: (_) => _startRecording('stt'),
+                  onLongPressEnd: (_) => _stopRecording(),
+                  child: ElevatedButton.icon(
+                    onPressed: () {},
+                    style: ElevatedButton.styleFrom(
+                      backgroundColor: _isRecording && _recordMode == 'stt'
+                          ? Colors.red
+                          : null,
+                    ),
+                    icon: Icon(_isRecording && _recordMode == 'stt'
+                        ? Icons.mic
+                        : Icons.mic_none),
+                    label: Text(_isRecording && _recordMode == 'stt'
+                        ? '录音中... 松开结束'
+                        : '长按录音'),
+                  ),
+                ),
+                if (_sttStatus.isNotEmpty)
+                  Padding(
+                    padding: const EdgeInsets.only(top: 8),
+                    child: Text(_sttStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)),
+                  ),
+                if (_sttResult.isNotEmpty)
+                  Container(
+                    width: double.infinity,
+                    margin: const EdgeInsets.only(top: 8),
+                    padding: const EdgeInsets.all(12),
+                    decoration: BoxDecoration(
+                      color: Colors.grey[100],
+                      borderRadius: BorderRadius.circular(8),
+                    ),
+                    child: Text(_sttResult, style: const TextStyle(fontSize: 16)),
+                  ),
+              ],
+            ),
+          ),
+          const SizedBox(height: 16),
+
+          // Round-trip Section
+          _buildSection(
+            title: 'Round-trip (STT + TTS)',
+            subtitle: '录音 → 识别文本 → 合成语音播放',
+            child: Column(
+              crossAxisAlignment: CrossAxisAlignment.start,
+              children: [
+                GestureDetector(
+                  onLongPressStart: (_) => _startRecording('rt'),
+                  onLongPressEnd: (_) => _stopRecording(),
+                  child: ElevatedButton.icon(
+                    onPressed: () {},
+                    style: ElevatedButton.styleFrom(
+                      backgroundColor: _isRecording && _recordMode == 'rt'
+                          ? Colors.red
+                          : null,
+                    ),
+                    icon: Icon(_isRecording && _recordMode == 'rt'
+                        ? Icons.mic
+                        : Icons.swap_horiz),
+                    label: Text(_isRecording && _recordMode == 'rt'
+                        ? '录音中... 松开结束'
+                        : '长按说话 (Round-trip)'),
+                  ),
+                ),
+                if (_rtStatus.isNotEmpty)
+                  Padding(
+                    padding: const EdgeInsets.only(top: 8),
+                    child: Text(_rtStatus, style: TextStyle(color: Colors.grey[600], fontSize: 13)),
+                  ),
+                if (_rtResult.isNotEmpty)
+                  Container(
+                    width: double.infinity,
+                    margin: const EdgeInsets.only(top: 8),
+                    padding: const EdgeInsets.all(12),
+                    decoration: BoxDecoration(
+                      color: Colors.grey[100],
+                      borderRadius: BorderRadius.circular(8),
+                    ),
+                    child: Text(_rtResult, style: const TextStyle(fontSize: 14)),
+                  ),
+              ],
+            ),
+          ),
+        ],
+      ),
+    );
+  }
+
+  Widget _buildSection({
+    required String title,
+    String? subtitle,
+    required Widget child,
+  }) {
+    return Card(
+      child: Padding(
+        padding: const EdgeInsets.all(16),
+        child: Column(
+          crossAxisAlignment: CrossAxisAlignment.start,
+          children: [
+            Text(title, style: const TextStyle(fontSize: 16, fontWeight: FontWeight.bold)),
+            if (subtitle != null)
+              Padding(
+                padding: const EdgeInsets.only(top: 4),
+                child: Text(subtitle, style: TextStyle(fontSize: 12, color: Colors.grey[500])),
+              ),
+            const SizedBox(height: 12),
+            child,
+          ],
+        ),
+      ),
+    );
+  }
+}
diff --git a/it0_app/lib/features/settings/presentation/pages/settings_page.dart b/it0_app/lib/features/settings/presentation/pages/settings_page.dart
index 291cf5b..099d97e 100644
--- a/it0_app/lib/features/settings/presentation/pages/settings_page.dart
+++ b/it0_app/lib/features/settings/presentation/pages/settings_page.dart
@@ -3,6 +3,7 @@ import 'package:flutter_riverpod/flutter_riverpod.dart';
 import 'package:go_router/go_router.dart';
 import '../../../../core/theme/app_colors.dart';
 import '../../../auth/data/providers/auth_provider.dart';
+import '../../../agent_call/presentation/pages/voice_test_page.dart';
 import '../providers/settings_providers.dart';
 
 class SettingsPage extends ConsumerStatefulWidget {
@@ -132,6 +133,25 @@ class _SettingsPageState extends ConsumerState<SettingsPage> {
                 ),
             ],
           ),
+          const SizedBox(height: 24),
+
+          // ===== Dev / Debug Group =====
+          _SettingsGroup(
+            cardColor: cardColor,
+            children: [
+              _SettingsRow(
+                icon: Icons.record_voice_over,
+                iconBg: const Color(0xFF10B981),
+                title: '语音 I/O 测试',
+                trailing: Text('TTS / STT',
+                    style: TextStyle(color: subtitleColor, fontSize: 14)),
+                onTap: () => Navigator.push(
+                  context,
+                  MaterialPageRoute(builder: (_) => const VoiceTestPage()),
+                ),
+              ),
+            ],
+          ),
           const SizedBox(height: 32),
 
           // ===== Logout =====
diff --git a/packages/gateway/config/kong.yml b/packages/gateway/config/kong.yml
index 7e4b8fd..648f9b5 100644
--- a/packages/gateway/config/kong.yml
+++ b/packages/gateway/config/kong.yml
@@ -100,6 +100,10 @@ services:
         paths:
           - /api/v1/twilio
         strip_path: false
+      - name: voice-test
+        paths:
+          - /api/v1/test
+        strip_path: false
 
   - name: audit-service
     url: http://audit-service:3007