From ecc64e0ff9e3e5a7ee92983778b5159d2e0b23a1 Mon Sep 17 00:00:00 2001 From: hailin Date: Sat, 7 Mar 2026 00:03:58 -0800 Subject: [PATCH] fix(stt): always use Whisper auto language detection, remove app language hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whisper detects language from audio content — speaks Chinese gets Chinese, speaks English gets English. App language setting is irrelevant to STT. Co-Authored-By: Claude Sonnet 4.6 --- .../chat/data/datasources/chat_remote_datasource.dart | 10 +++------- .../features/chat/presentation/pages/chat_page.dart | 4 +--- .../chat/presentation/providers/chat_providers.dart | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart b/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart index c9bccb4..02cf55b 100644 --- a/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart +++ b/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart @@ -153,20 +153,16 @@ class ChatRemoteDatasource { /// Uploads an audio file to the voice-message endpoint. /// Backend performs STT, interrupts any running task if needed, and /// Transcribe audio to text (STT only, does NOT send to agent). - /// Returns the transcript string. - /// Pass language='auto' to let Whisper auto-detect (best for mixed-language). + /// No language hint is sent — Whisper auto-detects from audio content. Future transcribeAudio({ required String audioPath, - String language = 'zh', }) async { - final fields = { + final formData = FormData.fromMap({ 'audio': await MultipartFile.fromFile( audioPath, filename: audioPath.split('/').last, ), - }; - if (language != 'auto') fields['language'] = language; - final formData = FormData.fromMap(fields); + }); final response = await _dio.post( '${ApiEndpoints.agent}/transcribe', data: formData, diff --git a/it0_app/lib/features/chat/presentation/pages/chat_page.dart b/it0_app/lib/features/chat/presentation/pages/chat_page.dart index c1b343d..2e9e6fd 100644 --- a/it0_app/lib/features/chat/presentation/pages/chat_page.dart +++ b/it0_app/lib/features/chat/presentation/pages/chat_page.dart @@ -13,7 +13,6 @@ import '../widgets/approval_action_card.dart'; import '../widgets/conversation_drawer.dart'; import '../../../agent_call/presentation/pages/agent_call_page.dart'; import '../widgets/voice_mic_button.dart'; -import '../../../settings/presentation/providers/settings_providers.dart'; // --------------------------------------------------------------------------- // Chat page – Timeline workflow style (inspired by Claude Code VSCode) @@ -62,8 +61,7 @@ class _ChatPageState extends ConsumerState { _messageController.text = '识别中…'; }); try { - final language = ref.read(settingsProvider).language; - final text = await ref.read(chatProvider.notifier).transcribeAudio(audioPath, language: language); + final text = await ref.read(chatProvider.notifier).transcribeAudio(audioPath); if (mounted) { setState(() { _messageController.text = text; diff --git a/it0_app/lib/features/chat/presentation/providers/chat_providers.dart b/it0_app/lib/features/chat/presentation/providers/chat_providers.dart index db97861..347e985 100644 --- a/it0_app/lib/features/chat/presentation/providers/chat_providers.dart +++ b/it0_app/lib/features/chat/presentation/providers/chat_providers.dart @@ -573,9 +573,9 @@ class ChatNotifier extends StateNotifier { } } - Future transcribeAudio(String audioPath, {String language = 'zh'}) async { + Future transcribeAudio(String audioPath) async { final datasource = _ref.read(chatRemoteDatasourceProvider); - return datasource.transcribeAudio(audioPath: audioPath, language: language); + return datasource.transcribeAudio(audioPath: audioPath); } Future cancelCurrentTask() async {