feat(stt): support auto language detection for mixed Chinese-English input
- Flutter: language='auto' omits the language field → backend receives none - Backend: no language field → passes undefined to STT service - STT service: language=undefined → omits language param from Whisper request - Whisper auto-detects language per utterance when no hint is provided Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
23675fa5a5
commit
4c7c05eb37
|
|
@ -154,17 +154,19 @@ class ChatRemoteDatasource {
|
||||||
/// Backend performs STT, interrupts any running task if needed, and
|
/// Backend performs STT, interrupts any running task if needed, and
|
||||||
/// Transcribe audio to text (STT only, does NOT send to agent).
|
/// Transcribe audio to text (STT only, does NOT send to agent).
|
||||||
/// Returns the transcript string.
|
/// Returns the transcript string.
|
||||||
|
/// Pass language='auto' to let Whisper auto-detect (best for mixed-language).
|
||||||
Future<String> transcribeAudio({
|
Future<String> transcribeAudio({
|
||||||
required String audioPath,
|
required String audioPath,
|
||||||
String language = 'zh',
|
String language = 'zh',
|
||||||
}) async {
|
}) async {
|
||||||
final formData = FormData.fromMap({
|
final fields = <String, dynamic>{
|
||||||
'audio': await MultipartFile.fromFile(
|
'audio': await MultipartFile.fromFile(
|
||||||
audioPath,
|
audioPath,
|
||||||
filename: audioPath.split('/').last,
|
filename: audioPath.split('/').last,
|
||||||
),
|
),
|
||||||
'language': language,
|
};
|
||||||
});
|
if (language != 'auto') fields['language'] = language;
|
||||||
|
final formData = FormData.fromMap(fields);
|
||||||
final response = await _dio.post(
|
final response = await _dio.post(
|
||||||
'${ApiEndpoints.agent}/transcribe',
|
'${ApiEndpoints.agent}/transcribe',
|
||||||
data: formData,
|
data: formData,
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ export class OpenAISttService {
|
||||||
async transcribe(
|
async transcribe(
|
||||||
audioBuffer: Buffer,
|
audioBuffer: Buffer,
|
||||||
filename: string,
|
filename: string,
|
||||||
language = 'zh',
|
language?: string,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const url = `${this.baseUrl}/v1/audio/transcriptions`;
|
const url = `${this.baseUrl}/v1/audio/transcriptions`;
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
|
|
@ -54,7 +54,7 @@ export class OpenAISttService {
|
||||||
};
|
};
|
||||||
|
|
||||||
appendField('model', 'whisper-1');
|
appendField('model', 'whisper-1');
|
||||||
appendField('language', language);
|
if (language) appendField('language', language); // omit → Whisper auto-detects
|
||||||
appendField('response_format', 'json');
|
appendField('response_format', 'json');
|
||||||
|
|
||||||
// File field
|
// File field
|
||||||
|
|
|
||||||
|
|
@ -421,10 +421,11 @@ export class AgentController {
|
||||||
if (!file?.buffer?.length) {
|
if (!file?.buffer?.length) {
|
||||||
throw new BadRequestException('audio file is required');
|
throw new BadRequestException('audio file is required');
|
||||||
}
|
}
|
||||||
|
// language=undefined → Whisper auto-detects (best for mixed-language input)
|
||||||
const text = await this.sttService.transcribe(
|
const text = await this.sttService.transcribe(
|
||||||
file.buffer,
|
file.buffer,
|
||||||
file.originalname || 'audio.m4a',
|
file.originalname || 'audio.m4a',
|
||||||
language ?? 'zh',
|
language || undefined,
|
||||||
);
|
);
|
||||||
return { text: text?.trim() ?? '' };
|
return { text: text?.trim() ?? '' };
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue