feat: replace speech_to_text with GTCRN ML noise reduction + backend STT
Replace traditional on-device speech_to_text with a modern pipeline: - Record audio via `record` package with hardware noise suppression - Apply GTCRN neural denoising (sherpa-onnx, ICASSP 2024, 48K params) - Trim silence, POST to backend /voice/transcribe (faster-whisper) Changes: - Add /transcribe endpoint to voice-service for audio file upload - Add SpeechEnhancer wrapper for sherpa-onnx GTCRN model (523KB) - Rewrite chat_page.dart voice input: record → denoise → transcribe - Keep NoiseReducer.trimSilence for silence removal only - Upgrade record to v6.2.0, add sherpa_onnx, path_provider Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
39c0d83424
commit
a568558585
|
|
@ -15,16 +15,6 @@ import io.flutter.embedding.engine.FlutterEngine;
|
|||
public final class GeneratedPluginRegistrant {
|
||||
private static final String TAG = "GeneratedPluginRegistrant";
|
||||
public static void registerWith(@NonNull FlutterEngine flutterEngine) {
|
||||
try {
|
||||
flutterEngine.getPlugins().add(new io.flutter.plugins.firebase.core.FlutterFirebaseCorePlugin());
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Error registering plugin firebase_core, io.flutter.plugins.firebase.core.FlutterFirebaseCorePlugin", e);
|
||||
}
|
||||
try {
|
||||
flutterEngine.getPlugins().add(new io.flutter.plugins.firebase.messaging.FlutterFirebaseMessagingPlugin());
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Error registering plugin firebase_messaging, io.flutter.plugins.firebase.messaging.FlutterFirebaseMessagingPlugin", e);
|
||||
}
|
||||
try {
|
||||
flutterEngine.getPlugins().add(new com.it_nomads.fluttersecurestorage.FlutterSecureStoragePlugin());
|
||||
} catch (Exception e) {
|
||||
|
|
@ -45,16 +35,16 @@ public final class GeneratedPluginRegistrant {
|
|||
} catch (Exception e) {
|
||||
Log.e(TAG, "Error registering plugin permission_handler_android, com.baseflow.permissionhandler.PermissionHandlerPlugin", e);
|
||||
}
|
||||
try {
|
||||
flutterEngine.getPlugins().add(new com.llfbandit.record.RecordPlugin());
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Error registering plugin record_android, com.llfbandit.record.RecordPlugin", e);
|
||||
}
|
||||
try {
|
||||
flutterEngine.getPlugins().add(new io.flutter.plugins.sharedpreferences.SharedPreferencesPlugin());
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Error registering plugin shared_preferences_android, io.flutter.plugins.sharedpreferences.SharedPreferencesPlugin", e);
|
||||
}
|
||||
try {
|
||||
flutterEngine.getPlugins().add(new com.csdcorp.speech_to_text.SpeechToTextPlugin());
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Error registering plugin speech_to_text, com.csdcorp.speech_to_text.SpeechToTextPlugin", e);
|
||||
}
|
||||
try {
|
||||
flutterEngine.getPlugins().add(new io.flutter.plugins.urllauncher.UrlLauncherPlugin());
|
||||
} catch (Exception e) {
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -0,0 +1,67 @@
|
|||
import 'dart:math';
|
||||
import 'dart:typed_data';
|
||||
|
||||
/// Utility for trimming leading/trailing silence from PCM audio.
|
||||
///
|
||||
/// The heavy-lifting noise reduction is handled by [SpeechEnhancer] (GTCRN).
|
||||
/// This class provides a lightweight silence trimmer to reduce data sent
|
||||
/// to the backend STT.
|
||||
class NoiseReducer {
|
||||
/// Trim leading and trailing silence from PCM audio.
|
||||
///
|
||||
/// Keeps a [marginMs] buffer around detected speech boundaries.
|
||||
static Uint8List trimSilence(
|
||||
Uint8List pcm16Data, {
|
||||
int sampleRate = 16000,
|
||||
double silenceThresholdDb = -40,
|
||||
int marginMs = 200,
|
||||
}) {
|
||||
if (pcm16Data.length < 4) return pcm16Data;
|
||||
|
||||
final sampleCount = pcm16Data.length ~/ 2;
|
||||
final byteData = ByteData.sublistView(pcm16Data);
|
||||
final frameSize = (sampleRate * 0.020).round();
|
||||
final marginSamples = (sampleRate * marginMs / 1000).round();
|
||||
|
||||
// Convert dB threshold to amplitude
|
||||
final threshold = 32768 * pow(10, silenceThresholdDb / 20);
|
||||
|
||||
// Find first non-silent frame
|
||||
int startSample = 0;
|
||||
bool foundStart = false;
|
||||
for (int i = 0; i < sampleCount - frameSize; i += frameSize) {
|
||||
double rms = 0;
|
||||
final end = min(i + frameSize, sampleCount);
|
||||
for (int j = i; j < end; j++) {
|
||||
final s = byteData.getInt16(j * 2, Endian.little).toDouble();
|
||||
rms += s * s;
|
||||
}
|
||||
rms = sqrt(rms / (end - i));
|
||||
if (rms > threshold) {
|
||||
startSample = max(0, i - marginSamples);
|
||||
foundStart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!foundStart) return Uint8List(0);
|
||||
|
||||
// Find last non-silent frame
|
||||
int endSample = sampleCount;
|
||||
for (int i = sampleCount - frameSize; i >= 0; i -= frameSize) {
|
||||
double rms = 0;
|
||||
final end = min(i + frameSize, sampleCount);
|
||||
for (int j = i; j < end; j++) {
|
||||
final s = byteData.getInt16(j * 2, Endian.little).toDouble();
|
||||
rms += s * s;
|
||||
}
|
||||
rms = sqrt(rms / (end - i));
|
||||
if (rms > threshold) {
|
||||
endSample = min(sampleCount, i + frameSize + marginSamples);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (startSample >= endSample) return Uint8List(0);
|
||||
return Uint8List.sublistView(pcm16Data, startSample * 2, endSample * 2);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
import 'dart:io';
|
||||
import 'dart:typed_data';
|
||||
import 'package:flutter/services.dart' show rootBundle;
|
||||
import 'package:path/path.dart' as p;
|
||||
import 'package:path_provider/path_provider.dart';
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa;
|
||||
|
||||
/// Wraps sherpa-onnx's GTCRN model for speech enhancement (noise reduction).
|
||||
///
|
||||
/// Uses the GTCRN model (ICASSP 2024, ~48K params, 523KB) which provides
|
||||
/// lightweight ML-based denoising without over-processing — ideal for
|
||||
/// preprocessing audio before sending to backend STT (faster-whisper).
|
||||
///
|
||||
/// Usage:
|
||||
/// final enhancer = SpeechEnhancer();
|
||||
/// await enhancer.init();
|
||||
/// final denoised = enhancer.enhance(pcm16Bytes);
|
||||
/// enhancer.dispose();
|
||||
class SpeechEnhancer {
|
||||
static const String _modelAsset = 'assets/gtcrn_simple.onnx';
|
||||
|
||||
sherpa.OfflineSpeechDenoiser? _denoiser;
|
||||
bool _initialized = false;
|
||||
String? _modelPath;
|
||||
|
||||
/// Initialize the GTCRN denoiser. Must be called before [enhance].
|
||||
/// Copies the model from Flutter assets to the filesystem on first run.
|
||||
Future<void> init() async {
|
||||
if (_initialized) return;
|
||||
|
||||
_modelPath = await _copyAssetToFile(_modelAsset);
|
||||
|
||||
final config = sherpa.OfflineSpeechDenoiserConfig(
|
||||
model: sherpa.OfflineSpeechDenoiserModelConfig(
|
||||
gtcrn: sherpa.OfflineSpeechDenoiserGtcrnModelConfig(
|
||||
model: _modelPath!,
|
||||
),
|
||||
numThreads: 1,
|
||||
debug: false,
|
||||
provider: 'cpu',
|
||||
),
|
||||
);
|
||||
|
||||
_denoiser = sherpa.OfflineSpeechDenoiser(config);
|
||||
_initialized = true;
|
||||
}
|
||||
|
||||
/// Denoise PCM 16-bit signed little-endian mono audio at 16 kHz.
|
||||
///
|
||||
/// Returns denoised PCM bytes in the same format.
|
||||
/// If the enhancer is not initialized, returns the input unchanged.
|
||||
Uint8List enhance(Uint8List pcm16Bytes) {
|
||||
if (_denoiser == null || pcm16Bytes.length < 640) return pcm16Bytes;
|
||||
|
||||
// Convert Int16 PCM → Float32 normalized [-1.0, 1.0]
|
||||
final sampleCount = pcm16Bytes.length ~/ 2;
|
||||
final float32 = Float32List(sampleCount);
|
||||
final byteData = ByteData.sublistView(pcm16Bytes);
|
||||
for (int i = 0; i < sampleCount; i++) {
|
||||
float32[i] = byteData.getInt16(i * 2, Endian.little) / 32768.0;
|
||||
}
|
||||
|
||||
// Run GTCRN denoiser
|
||||
final result = _denoiser!.run(samples: float32, sampleRate: 16000);
|
||||
|
||||
// Convert Float32 → Int16 PCM bytes
|
||||
final output = Uint8List(result.samples.length * 2);
|
||||
final outView = ByteData.sublistView(output);
|
||||
for (int i = 0; i < result.samples.length; i++) {
|
||||
final val = (result.samples[i] * 32768.0).round().clamp(-32768, 32767);
|
||||
outView.setInt16(i * 2, val, Endian.little);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/// Release native resources. Must be called when done.
|
||||
void dispose() {
|
||||
_denoiser?.free();
|
||||
_denoiser = null;
|
||||
_initialized = false;
|
||||
}
|
||||
|
||||
/// Copy a Flutter asset to the app support directory (cached).
|
||||
static Future<String> _copyAssetToFile(String assetPath) async {
|
||||
final dir = await getApplicationSupportDirectory();
|
||||
final target = p.join(dir.path, p.basename(assetPath));
|
||||
final file = File(target);
|
||||
|
||||
if (!await file.exists()) {
|
||||
final data = await rootBundle.load(assetPath);
|
||||
await file.writeAsBytes(
|
||||
data.buffer.asUint8List(data.offsetInBytes, data.lengthInBytes),
|
||||
);
|
||||
}
|
||||
return target;
|
||||
}
|
||||
}
|
||||
|
|
@ -38,6 +38,9 @@ class ApiEndpoints {
|
|||
static const String contacts = '$comm/contacts';
|
||||
static const String messages = '$comm/messages';
|
||||
|
||||
// Voice
|
||||
static const String transcribe = '$voice/transcribe';
|
||||
|
||||
// WebSocket
|
||||
static const String wsTerminal = '/ws/terminal';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import 'dart:async';
|
||||
import 'dart:typed_data';
|
||||
import 'package:dio/dio.dart' show FormData, MultipartFile;
|
||||
import 'package:flutter/material.dart';
|
||||
import 'package:flutter_riverpod/flutter_riverpod.dart';
|
||||
import 'package:speech_to_text/speech_to_text.dart' as stt;
|
||||
import 'package:record/record.dart';
|
||||
import '../../../../core/audio/noise_reducer.dart';
|
||||
import '../../../../core/audio/speech_enhancer.dart';
|
||||
import '../../../../core/config/api_endpoints.dart';
|
||||
import '../../../../core/network/dio_client.dart';
|
||||
import '../../../../core/network/websocket_client.dart';
|
||||
|
|
@ -207,59 +211,101 @@ class _ChatPageState extends ConsumerState<ChatPage> with SingleTickerProviderSt
|
|||
final _messageController = TextEditingController();
|
||||
final _scrollController = ScrollController();
|
||||
|
||||
// -- Voice input (TODO 40) ------------------------------------------------
|
||||
late final stt.SpeechToText _speech;
|
||||
bool _speechAvailable = false;
|
||||
// -- Voice input (record + GTCRN denoise + backend STT) -------------------
|
||||
late final AudioRecorder _recorder;
|
||||
final SpeechEnhancer _enhancer = SpeechEnhancer();
|
||||
bool _isListening = false;
|
||||
bool _isTranscribing = false;
|
||||
List<List<int>> _audioChunks = [];
|
||||
StreamSubscription<List<int>>? _audioSubscription;
|
||||
late AnimationController _micPulseController;
|
||||
|
||||
@override
|
||||
void initState() {
|
||||
super.initState();
|
||||
_speech = stt.SpeechToText();
|
||||
_initSpeech();
|
||||
_recorder = AudioRecorder();
|
||||
_enhancer.init(); // load GTCRN model in background
|
||||
_micPulseController = AnimationController(
|
||||
vsync: this,
|
||||
duration: const Duration(milliseconds: 800),
|
||||
);
|
||||
}
|
||||
|
||||
Future<void> _initSpeech() async {
|
||||
_speechAvailable = await _speech.initialize(
|
||||
onStatus: (status) {
|
||||
if (status == 'done' || status == 'notListening') {
|
||||
_stopListening(autoSubmit: true);
|
||||
}
|
||||
},
|
||||
onError: (_) => _stopListening(),
|
||||
);
|
||||
if (mounted) setState(() {});
|
||||
}
|
||||
Future<void> _startListening() async {
|
||||
final hasPermission = await _recorder.hasPermission();
|
||||
if (!hasPermission || !mounted) return;
|
||||
|
||||
void _startListening() {
|
||||
if (!_speechAvailable) return;
|
||||
setState(() => _isListening = true);
|
||||
_micPulseController.repeat(reverse: true);
|
||||
_speech.listen(
|
||||
onResult: (result) {
|
||||
_messageController.text = result.recognizedWords;
|
||||
if (result.finalResult) {
|
||||
_stopListening(autoSubmit: true);
|
||||
}
|
||||
},
|
||||
listenFor: const Duration(seconds: 30),
|
||||
pauseFor: const Duration(seconds: 3),
|
||||
);
|
||||
_audioChunks = [];
|
||||
|
||||
// Stream raw PCM 16kHz mono with platform noise suppression + AGC
|
||||
final stream = await _recorder.startStream(const RecordConfig(
|
||||
encoder: AudioEncoder.pcm16bits,
|
||||
sampleRate: 16000,
|
||||
numChannels: 1,
|
||||
noiseSuppress: true,
|
||||
autoGain: true,
|
||||
));
|
||||
|
||||
_audioSubscription = stream.listen((data) {
|
||||
_audioChunks.add(data);
|
||||
});
|
||||
}
|
||||
|
||||
void _stopListening({bool autoSubmit = false}) {
|
||||
_speech.stop();
|
||||
Future<void> _stopListening({bool autoSubmit = false}) async {
|
||||
if (!_isListening) return;
|
||||
|
||||
// Stop recording and stream
|
||||
await _recorder.stop();
|
||||
await _audioSubscription?.cancel();
|
||||
_audioSubscription = null;
|
||||
_micPulseController.stop();
|
||||
_micPulseController.reset();
|
||||
|
||||
if (!mounted) return;
|
||||
setState(() => _isListening = false);
|
||||
if (autoSubmit && _messageController.text.trim().isNotEmpty) {
|
||||
_send();
|
||||
|
||||
if (!autoSubmit || _audioChunks.isEmpty) return;
|
||||
|
||||
// Transcribe via backend
|
||||
setState(() => _isTranscribing = true);
|
||||
|
||||
try {
|
||||
// Combine recorded chunks into a single PCM buffer
|
||||
final allBytes = _audioChunks.expand((c) => c).toList();
|
||||
final pcmData = Uint8List.fromList(allBytes);
|
||||
_audioChunks = [];
|
||||
|
||||
// GTCRN ML denoise (light) + trim leading/trailing silence
|
||||
final denoised = _enhancer.enhance(pcmData);
|
||||
final trimmed = NoiseReducer.trimSilence(denoised);
|
||||
|
||||
if (trimmed.isEmpty) {
|
||||
if (mounted) setState(() => _isTranscribing = false);
|
||||
return;
|
||||
}
|
||||
|
||||
// POST to backend /voice/transcribe
|
||||
final dio = ref.read(dioClientProvider);
|
||||
final formData = FormData.fromMap({
|
||||
'audio': MultipartFile.fromBytes(trimmed, filename: 'audio.pcm'),
|
||||
});
|
||||
final response = await dio.post(
|
||||
ApiEndpoints.transcribe,
|
||||
data: formData,
|
||||
);
|
||||
|
||||
final text =
|
||||
(response.data as Map<String, dynamic>)['text'] as String? ?? '';
|
||||
if (text.isNotEmpty && mounted) {
|
||||
_messageController.text = text;
|
||||
_send();
|
||||
}
|
||||
} catch (_) {
|
||||
// Voice failed silently – user can still type
|
||||
} finally {
|
||||
if (mounted) setState(() => _isTranscribing = false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -383,28 +429,42 @@ class _ChatPageState extends ConsumerState<ChatPage> with SingleTickerProviderSt
|
|||
),
|
||||
),
|
||||
|
||||
// Voice listening indicator
|
||||
if (_isListening)
|
||||
// Voice listening / transcribing indicator
|
||||
if (_isListening || _isTranscribing)
|
||||
Container(
|
||||
padding: const EdgeInsets.symmetric(vertical: 8, horizontal: 16),
|
||||
color: AppColors.error.withOpacity(0.1),
|
||||
color: (_isListening ? AppColors.error : AppColors.primary)
|
||||
.withOpacity(0.1),
|
||||
child: Row(
|
||||
children: [
|
||||
AnimatedBuilder(
|
||||
animation: _micPulseController,
|
||||
builder: (context, _) => Icon(
|
||||
Icons.mic,
|
||||
color: AppColors.error,
|
||||
size: 20 + (_micPulseController.value * 4),
|
||||
if (_isListening)
|
||||
AnimatedBuilder(
|
||||
animation: _micPulseController,
|
||||
builder: (context, _) => Icon(
|
||||
Icons.mic,
|
||||
color: AppColors.error,
|
||||
size: 20 + (_micPulseController.value * 4),
|
||||
),
|
||||
)
|
||||
else
|
||||
const SizedBox(
|
||||
width: 20,
|
||||
height: 20,
|
||||
child: CircularProgressIndicator(strokeWidth: 2),
|
||||
),
|
||||
const SizedBox(width: 8),
|
||||
Text(
|
||||
_isListening ? 'Listening...' : 'Transcribing...',
|
||||
style: TextStyle(
|
||||
color: _isListening ? AppColors.error : AppColors.primary,
|
||||
),
|
||||
),
|
||||
const SizedBox(width: 8),
|
||||
const Text('Listening...', style: TextStyle(color: AppColors.error)),
|
||||
const Spacer(),
|
||||
TextButton(
|
||||
onPressed: () => _stopListening(),
|
||||
child: const Text('Cancel'),
|
||||
),
|
||||
if (_isListening)
|
||||
TextButton(
|
||||
onPressed: () => _stopListening(),
|
||||
child: const Text('Cancel'),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
|
|
@ -459,7 +519,9 @@ class _ChatPageState extends ConsumerState<ChatPage> with SingleTickerProviderSt
|
|||
_messageController.dispose();
|
||||
_scrollController.dispose();
|
||||
_micPulseController.dispose();
|
||||
_speech.stop();
|
||||
_audioSubscription?.cancel();
|
||||
_recorder.dispose();
|
||||
_enhancer.dispose();
|
||||
super.dispose();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,14 +9,6 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "85.0.0"
|
||||
_flutterfire_internals:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: _flutterfire_internals
|
||||
sha256: "37a42d06068e2fe3deddb2da079a8c4d105f241225ba27b7122b37e9865fd8f7"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.3.35"
|
||||
analyzer:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -177,6 +169,14 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.0.7"
|
||||
cupertino_icons:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: cupertino_icons
|
||||
sha256: ba631d1c7f7bef6b729a622b7b752645a2d076dba9976925b8f25725a30e1ee6
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.8"
|
||||
custom_lint_core:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -249,54 +249,6 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "7.0.1"
|
||||
firebase_core:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: firebase_core
|
||||
sha256: "26de145bb9688a90962faec6f838247377b0b0d32cc0abecd9a4e43525fc856c"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.32.0"
|
||||
firebase_core_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: firebase_core_platform_interface
|
||||
sha256: "8bcfad6d7033f5ea951d15b867622a824b13812178bfec0c779b9d81de011bbb"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "5.4.2"
|
||||
firebase_core_web:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: firebase_core_web
|
||||
sha256: "362e52457ed2b7b180964769c1e04d1e0ea0259fdf7025fdfedd019d4ae2bd88"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.17.5"
|
||||
firebase_messaging:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: firebase_messaging
|
||||
sha256: a1662cc95d9750a324ad9df349b873360af6f11414902021f130c68ec02267c4
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "14.9.4"
|
||||
firebase_messaging_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: firebase_messaging_platform_interface
|
||||
sha256: "87c4a922cb6f811cfb7a889bdbb3622702443c52a0271636cbc90d813ceac147"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "4.5.37"
|
||||
firebase_messaging_web:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: firebase_messaging_web
|
||||
sha256: "0d34dca01a7b103ed7f20138bffbb28eb0e61a677bf9e78a028a932e2c7322d5"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.8.7"
|
||||
fixnum:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -390,6 +342,14 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.1.2"
|
||||
flutter_svg:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: flutter_svg
|
||||
sha256: "87fbd7c534435b6c5d9d98b01e1fd527812b82e68ddd8bd35fc45ed0fa8f0a95"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.2.3"
|
||||
flutter_test:
|
||||
dependency: "direct dev"
|
||||
description: flutter
|
||||
|
|
@ -472,6 +432,14 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.0"
|
||||
http:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: http
|
||||
sha256: "87721a4a50b19c7f1d49001e51409bddc46303966ce89a65af4f4e6004896412"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.6.0"
|
||||
http_multi_server:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -633,15 +601,23 @@ packages:
|
|||
source: hosted
|
||||
version: "2.2.0"
|
||||
path:
|
||||
dependency: transitive
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: path
|
||||
sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.9.1"
|
||||
path_provider:
|
||||
path_parsing:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: path_parsing
|
||||
sha256: "883402936929eac138ee0a45da5b0f2c80f89913e6dc3bf77eb65b84b409c6ca"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.0"
|
||||
path_provider:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: path_provider
|
||||
sha256: "50c5dd5b6e1aaf6fb3a78b33f6aa3afca52bf903a8a5298f53101fdaee55bbcd"
|
||||
|
|
@ -688,14 +664,6 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.3.0"
|
||||
pedantic:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: pedantic
|
||||
sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.11.1"
|
||||
permission_handler:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
|
|
@ -744,6 +712,14 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.2.1"
|
||||
petitparser:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: petitparser
|
||||
sha256: "91bd59303e9f769f108f8df05e371341b15d59e995e6806aefab827b58336675"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "7.0.2"
|
||||
platform:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -792,6 +768,70 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "3.2.2"
|
||||
record:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: record
|
||||
sha256: d5b6b334f3ab02460db6544e08583c942dbf23e3504bf1e14fd4cbe3d9409277
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.2.0"
|
||||
record_android:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_android
|
||||
sha256: "94783f08403aed33ffb68797bf0715b0812eb852f3c7985644c945faea462ba1"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.5.1"
|
||||
record_ios:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_ios
|
||||
sha256: "8df7c136131bd05efc19256af29b2ba6ccc000ccc2c80d4b6b6d7a8d21a3b5a9"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.2.0"
|
||||
record_linux:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_linux
|
||||
sha256: c31a35cc158cd666fc6395f7f56fc054f31685571684be6b97670a27649ce5c7
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.3.0"
|
||||
record_macos:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_macos
|
||||
sha256: "084902e63fc9c0c224c29203d6c75f0bdf9b6a40536c9d916393c8f4c4256488"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.2.1"
|
||||
record_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_platform_interface
|
||||
sha256: "8a81dbc4e14e1272a285bbfef6c9136d070a47d9b0d1f40aa6193516253ee2f6"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.5.0"
|
||||
record_web:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_web
|
||||
sha256: "7e9846981c1f2d111d86f0ae3309071f5bba8b624d1c977316706f08fc31d16d"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.3.0"
|
||||
record_windows:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: record_windows
|
||||
sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.0.7"
|
||||
riverpod:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -896,6 +936,54 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.0.1"
|
||||
sherpa_onnx:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: sherpa_onnx
|
||||
sha256: "6f14669c62bb0812c3f99adcd4cae8677037106618cc51ce09d285a4c5db828a"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.12.25"
|
||||
sherpa_onnx_android:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: sherpa_onnx_android
|
||||
sha256: f9881cd42347eac0619298186d86f286ce6b74947a27b8506f6729496ebccc5d
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.12.25"
|
||||
sherpa_onnx_ios:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: sherpa_onnx_ios
|
||||
sha256: a9c916340eda3bb24ce4598810fc141469f3d9afd0290390d1cc749044ae919d
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.12.25"
|
||||
sherpa_onnx_linux:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: sherpa_onnx_linux
|
||||
sha256: "82d4664ab6df87a76c12987cb420cbe112133d710f9b2e30c4e83d7ad1e93fb0"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.12.25"
|
||||
sherpa_onnx_macos:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: sherpa_onnx_macos
|
||||
sha256: "453fa9a6fdff47d4e8aeef5f9c3ed64327e14769401a16b36213b7a3a3b8aae0"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.12.25"
|
||||
sherpa_onnx_windows:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: sherpa_onnx_windows
|
||||
sha256: c70446773ddab00e8f78b415fe1a580723d49c1f78ad7ce751183620b35c1ffd
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.12.25"
|
||||
sky_engine:
|
||||
dependency: transitive
|
||||
description: flutter
|
||||
|
|
@ -925,30 +1013,6 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.10.2"
|
||||
speech_to_text:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: speech_to_text
|
||||
sha256: "57fef1d41bdebe298e84842c89bb4ac91f31cdbec7830c8cb1fc6b91d03abd42"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.6.0"
|
||||
speech_to_text_macos:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: speech_to_text_macos
|
||||
sha256: e685750f7542fcaa087a5396ee471e727ec648bf681f4da83c84d086322173f6
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.0"
|
||||
speech_to_text_platform_interface:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: speech_to_text_platform_interface
|
||||
sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.3.0"
|
||||
stack_trace:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -1093,6 +1157,30 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "4.5.2"
|
||||
vector_graphics:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: vector_graphics
|
||||
sha256: a4f059dc26fc8295b5921376600a194c4ec7d55e72f2fe4c7d2831e103d461e6
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.19"
|
||||
vector_graphics_codec:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: vector_graphics_codec
|
||||
sha256: "99fd9fbd34d9f9a32efd7b6a6aae14125d8237b10403b422a6a6dfeac2806146"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.13"
|
||||
vector_graphics_compiler:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: vector_graphics_compiler
|
||||
sha256: "5a88dd14c0954a5398af544651c7fb51b457a2a556949bfb25369b210ef73a74"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.2.0"
|
||||
vector_math:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -1121,18 +1209,18 @@ packages:
|
|||
dependency: transitive
|
||||
description:
|
||||
name: web
|
||||
sha256: "97da13628db363c635202ad97068d47c5b8aa555808e7a9411963c533b449b27"
|
||||
sha256: "868d88a33d8a87b18ffc05f9f030ba328ffefba92d6c127917a2ba740f9cfe4a"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "0.5.1"
|
||||
version: "1.1.1"
|
||||
web_socket_channel:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
name: web_socket_channel
|
||||
sha256: "58c6666b342a38816b2e7e50ed0f1e261959630becd4c879c4f26bfa14aa5a42"
|
||||
sha256: d88238e5eac9a42bb43ca4e721edba3c08c6354d4a53063afaa568516217621b
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "2.4.5"
|
||||
version: "2.4.0"
|
||||
win32:
|
||||
dependency: transitive
|
||||
description:
|
||||
|
|
@ -1149,6 +1237,14 @@ packages:
|
|||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "1.1.0"
|
||||
xml:
|
||||
dependency: transitive
|
||||
description:
|
||||
name: xml
|
||||
sha256: "971043b3a0d3da28727e40ed3e0b5d18b742fa5a68665cca88e74b7876d5e025"
|
||||
url: "https://pub.dev"
|
||||
source: hosted
|
||||
version: "6.6.1"
|
||||
xterm:
|
||||
dependency: "direct main"
|
||||
description:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ environment:
|
|||
dependencies:
|
||||
flutter:
|
||||
sdk: flutter
|
||||
cupertino_icons: ^1.0.8
|
||||
|
||||
# State Management
|
||||
flutter_riverpod: ^2.5.0
|
||||
|
|
@ -36,13 +37,14 @@ dependencies:
|
|||
flutter_markdown: ^0.7.0
|
||||
flutter_svg: ^2.0.10+1
|
||||
|
||||
# Push Notifications
|
||||
firebase_core: ^2.27.0
|
||||
firebase_messaging: ^14.7.0
|
||||
|
||||
# Voice
|
||||
speech_to_text: ^6.6.0
|
||||
record: ^6.0.0
|
||||
flutter_tts: ^4.0.0
|
||||
sherpa_onnx: ^1.12.25
|
||||
|
||||
# File paths
|
||||
path_provider: ^2.1.0
|
||||
path: ^1.9.0
|
||||
|
||||
# Terminal
|
||||
xterm: ^4.0.0
|
||||
|
|
@ -70,11 +72,4 @@ flutter:
|
|||
assets:
|
||||
- assets/images/
|
||||
- assets/icons/
|
||||
- assets/animations/
|
||||
|
||||
fonts:
|
||||
- family: Roboto
|
||||
fonts:
|
||||
- asset: assets/fonts/Roboto-Regular.ttf
|
||||
- asset: assets/fonts/Roboto-Bold.ttf
|
||||
weight: 700
|
||||
- assets/gtcrn_simple.onnx
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import asyncio
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Request
|
||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Request, UploadFile, File
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
|
||||
|
|
@ -151,3 +152,18 @@ async def voice_websocket(websocket: WebSocket, session_id: str):
|
|||
await websocket.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@router.post("/transcribe")
|
||||
async def transcribe_audio(req: Request, audio: UploadFile = File(...)):
|
||||
"""Transcribe uploaded audio (PCM 16kHz 16-bit mono) to text using Whisper."""
|
||||
stt = getattr(req.app.state, "stt", None)
|
||||
if stt is None or stt._model is None:
|
||||
return JSONResponse(status_code=503, content={"error": "STT model not loaded"})
|
||||
|
||||
audio_data = await audio.read()
|
||||
if len(audio_data) == 0:
|
||||
return {"text": ""}
|
||||
|
||||
text = await stt.transcribe(audio_data)
|
||||
return {"text": text.strip()}
|
||||
|
|
|
|||
Loading…
Reference in New Issue