diff --git a/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart b/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart index 8ddf06c..c9bccb4 100644 --- a/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart +++ b/it0_app/lib/features/chat/data/datasources/chat_remote_datasource.dart @@ -154,17 +154,19 @@ class ChatRemoteDatasource { /// Backend performs STT, interrupts any running task if needed, and /// Transcribe audio to text (STT only, does NOT send to agent). /// Returns the transcript string. + /// Pass language='auto' to let Whisper auto-detect (best for mixed-language). Future transcribeAudio({ required String audioPath, String language = 'zh', }) async { - final formData = FormData.fromMap({ + final fields = { 'audio': await MultipartFile.fromFile( audioPath, filename: audioPath.split('/').last, ), - 'language': language, - }); + }; + if (language != 'auto') fields['language'] = language; + final formData = FormData.fromMap(fields); final response = await _dio.post( '${ApiEndpoints.agent}/transcribe', data: formData, diff --git a/packages/services/agent-service/src/infrastructure/stt/openai-stt.service.ts b/packages/services/agent-service/src/infrastructure/stt/openai-stt.service.ts index 2aa9209..7b5c185 100644 --- a/packages/services/agent-service/src/infrastructure/stt/openai-stt.service.ts +++ b/packages/services/agent-service/src/infrastructure/stt/openai-stt.service.ts @@ -34,7 +34,7 @@ export class OpenAISttService { async transcribe( audioBuffer: Buffer, filename: string, - language = 'zh', + language?: string, ): Promise { const url = `${this.baseUrl}/v1/audio/transcriptions`; this.logger.log( @@ -54,7 +54,7 @@ export class OpenAISttService { }; appendField('model', 'whisper-1'); - appendField('language', language); + if (language) appendField('language', language); // omit → Whisper auto-detects appendField('response_format', 'json'); // File field diff --git a/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts b/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts index f16ccb1..a3610de 100644 --- a/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts +++ b/packages/services/agent-service/src/interfaces/rest/controllers/agent.controller.ts @@ -421,10 +421,11 @@ export class AgentController { if (!file?.buffer?.length) { throw new BadRequestException('audio file is required'); } + // language=undefined → Whisper auto-detects (best for mixed-language input) const text = await this.sttService.transcribe( file.buffer, file.originalname || 'audio.m4a', - language ?? 'zh', + language || undefined, ); return { text: text?.trim() ?? '' }; }