diff --git a/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java b/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java index 808967e..717c5fe 100644 --- a/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java +++ b/it0_app/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java @@ -15,16 +15,6 @@ import io.flutter.embedding.engine.FlutterEngine; public final class GeneratedPluginRegistrant { private static final String TAG = "GeneratedPluginRegistrant"; public static void registerWith(@NonNull FlutterEngine flutterEngine) { - try { - flutterEngine.getPlugins().add(new io.flutter.plugins.firebase.core.FlutterFirebaseCorePlugin()); - } catch (Exception e) { - Log.e(TAG, "Error registering plugin firebase_core, io.flutter.plugins.firebase.core.FlutterFirebaseCorePlugin", e); - } - try { - flutterEngine.getPlugins().add(new io.flutter.plugins.firebase.messaging.FlutterFirebaseMessagingPlugin()); - } catch (Exception e) { - Log.e(TAG, "Error registering plugin firebase_messaging, io.flutter.plugins.firebase.messaging.FlutterFirebaseMessagingPlugin", e); - } try { flutterEngine.getPlugins().add(new com.it_nomads.fluttersecurestorage.FlutterSecureStoragePlugin()); } catch (Exception e) { @@ -45,16 +35,16 @@ public final class GeneratedPluginRegistrant { } catch (Exception e) { Log.e(TAG, "Error registering plugin permission_handler_android, com.baseflow.permissionhandler.PermissionHandlerPlugin", e); } + try { + flutterEngine.getPlugins().add(new com.llfbandit.record.RecordPlugin()); + } catch (Exception e) { + Log.e(TAG, "Error registering plugin record_android, com.llfbandit.record.RecordPlugin", e); + } try { flutterEngine.getPlugins().add(new io.flutter.plugins.sharedpreferences.SharedPreferencesPlugin()); } catch (Exception e) { Log.e(TAG, "Error registering plugin shared_preferences_android, io.flutter.plugins.sharedpreferences.SharedPreferencesPlugin", e); } - try { - flutterEngine.getPlugins().add(new com.csdcorp.speech_to_text.SpeechToTextPlugin()); - } catch (Exception e) { - Log.e(TAG, "Error registering plugin speech_to_text, com.csdcorp.speech_to_text.SpeechToTextPlugin", e); - } try { flutterEngine.getPlugins().add(new io.flutter.plugins.urllauncher.UrlLauncherPlugin()); } catch (Exception e) { diff --git a/it0_app/assets/gtcrn_simple.onnx b/it0_app/assets/gtcrn_simple.onnx new file mode 100644 index 0000000..99012f5 Binary files /dev/null and b/it0_app/assets/gtcrn_simple.onnx differ diff --git a/it0_app/lib/core/audio/noise_reducer.dart b/it0_app/lib/core/audio/noise_reducer.dart new file mode 100644 index 0000000..053cfe3 --- /dev/null +++ b/it0_app/lib/core/audio/noise_reducer.dart @@ -0,0 +1,67 @@ +import 'dart:math'; +import 'dart:typed_data'; + +/// Utility for trimming leading/trailing silence from PCM audio. +/// +/// The heavy-lifting noise reduction is handled by [SpeechEnhancer] (GTCRN). +/// This class provides a lightweight silence trimmer to reduce data sent +/// to the backend STT. +class NoiseReducer { + /// Trim leading and trailing silence from PCM audio. + /// + /// Keeps a [marginMs] buffer around detected speech boundaries. + static Uint8List trimSilence( + Uint8List pcm16Data, { + int sampleRate = 16000, + double silenceThresholdDb = -40, + int marginMs = 200, + }) { + if (pcm16Data.length < 4) return pcm16Data; + + final sampleCount = pcm16Data.length ~/ 2; + final byteData = ByteData.sublistView(pcm16Data); + final frameSize = (sampleRate * 0.020).round(); + final marginSamples = (sampleRate * marginMs / 1000).round(); + + // Convert dB threshold to amplitude + final threshold = 32768 * pow(10, silenceThresholdDb / 20); + + // Find first non-silent frame + int startSample = 0; + bool foundStart = false; + for (int i = 0; i < sampleCount - frameSize; i += frameSize) { + double rms = 0; + final end = min(i + frameSize, sampleCount); + for (int j = i; j < end; j++) { + final s = byteData.getInt16(j * 2, Endian.little).toDouble(); + rms += s * s; + } + rms = sqrt(rms / (end - i)); + if (rms > threshold) { + startSample = max(0, i - marginSamples); + foundStart = true; + break; + } + } + if (!foundStart) return Uint8List(0); + + // Find last non-silent frame + int endSample = sampleCount; + for (int i = sampleCount - frameSize; i >= 0; i -= frameSize) { + double rms = 0; + final end = min(i + frameSize, sampleCount); + for (int j = i; j < end; j++) { + final s = byteData.getInt16(j * 2, Endian.little).toDouble(); + rms += s * s; + } + rms = sqrt(rms / (end - i)); + if (rms > threshold) { + endSample = min(sampleCount, i + frameSize + marginSamples); + break; + } + } + + if (startSample >= endSample) return Uint8List(0); + return Uint8List.sublistView(pcm16Data, startSample * 2, endSample * 2); + } +} diff --git a/it0_app/lib/core/audio/speech_enhancer.dart b/it0_app/lib/core/audio/speech_enhancer.dart new file mode 100644 index 0000000..0def078 --- /dev/null +++ b/it0_app/lib/core/audio/speech_enhancer.dart @@ -0,0 +1,97 @@ +import 'dart:io'; +import 'dart:typed_data'; +import 'package:flutter/services.dart' show rootBundle; +import 'package:path/path.dart' as p; +import 'package:path_provider/path_provider.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa; + +/// Wraps sherpa-onnx's GTCRN model for speech enhancement (noise reduction). +/// +/// Uses the GTCRN model (ICASSP 2024, ~48K params, 523KB) which provides +/// lightweight ML-based denoising without over-processing — ideal for +/// preprocessing audio before sending to backend STT (faster-whisper). +/// +/// Usage: +/// final enhancer = SpeechEnhancer(); +/// await enhancer.init(); +/// final denoised = enhancer.enhance(pcm16Bytes); +/// enhancer.dispose(); +class SpeechEnhancer { + static const String _modelAsset = 'assets/gtcrn_simple.onnx'; + + sherpa.OfflineSpeechDenoiser? _denoiser; + bool _initialized = false; + String? _modelPath; + + /// Initialize the GTCRN denoiser. Must be called before [enhance]. + /// Copies the model from Flutter assets to the filesystem on first run. + Future init() async { + if (_initialized) return; + + _modelPath = await _copyAssetToFile(_modelAsset); + + final config = sherpa.OfflineSpeechDenoiserConfig( + model: sherpa.OfflineSpeechDenoiserModelConfig( + gtcrn: sherpa.OfflineSpeechDenoiserGtcrnModelConfig( + model: _modelPath!, + ), + numThreads: 1, + debug: false, + provider: 'cpu', + ), + ); + + _denoiser = sherpa.OfflineSpeechDenoiser(config); + _initialized = true; + } + + /// Denoise PCM 16-bit signed little-endian mono audio at 16 kHz. + /// + /// Returns denoised PCM bytes in the same format. + /// If the enhancer is not initialized, returns the input unchanged. + Uint8List enhance(Uint8List pcm16Bytes) { + if (_denoiser == null || pcm16Bytes.length < 640) return pcm16Bytes; + + // Convert Int16 PCM → Float32 normalized [-1.0, 1.0] + final sampleCount = pcm16Bytes.length ~/ 2; + final float32 = Float32List(sampleCount); + final byteData = ByteData.sublistView(pcm16Bytes); + for (int i = 0; i < sampleCount; i++) { + float32[i] = byteData.getInt16(i * 2, Endian.little) / 32768.0; + } + + // Run GTCRN denoiser + final result = _denoiser!.run(samples: float32, sampleRate: 16000); + + // Convert Float32 → Int16 PCM bytes + final output = Uint8List(result.samples.length * 2); + final outView = ByteData.sublistView(output); + for (int i = 0; i < result.samples.length; i++) { + final val = (result.samples[i] * 32768.0).round().clamp(-32768, 32767); + outView.setInt16(i * 2, val, Endian.little); + } + return output; + } + + /// Release native resources. Must be called when done. + void dispose() { + _denoiser?.free(); + _denoiser = null; + _initialized = false; + } + + /// Copy a Flutter asset to the app support directory (cached). + static Future _copyAssetToFile(String assetPath) async { + final dir = await getApplicationSupportDirectory(); + final target = p.join(dir.path, p.basename(assetPath)); + final file = File(target); + + if (!await file.exists()) { + final data = await rootBundle.load(assetPath); + await file.writeAsBytes( + data.buffer.asUint8List(data.offsetInBytes, data.lengthInBytes), + ); + } + return target; + } +} diff --git a/it0_app/lib/core/config/api_endpoints.dart b/it0_app/lib/core/config/api_endpoints.dart index bd2e038..3fd8bca 100644 --- a/it0_app/lib/core/config/api_endpoints.dart +++ b/it0_app/lib/core/config/api_endpoints.dart @@ -38,6 +38,9 @@ class ApiEndpoints { static const String contacts = '$comm/contacts'; static const String messages = '$comm/messages'; + // Voice + static const String transcribe = '$voice/transcribe'; + // WebSocket static const String wsTerminal = '/ws/terminal'; } diff --git a/it0_app/lib/features/chat/presentation/pages/chat_page.dart b/it0_app/lib/features/chat/presentation/pages/chat_page.dart index 29da58b..47894cf 100644 --- a/it0_app/lib/features/chat/presentation/pages/chat_page.dart +++ b/it0_app/lib/features/chat/presentation/pages/chat_page.dart @@ -1,7 +1,11 @@ import 'dart:async'; +import 'dart:typed_data'; +import 'package:dio/dio.dart' show FormData, MultipartFile; import 'package:flutter/material.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; -import 'package:speech_to_text/speech_to_text.dart' as stt; +import 'package:record/record.dart'; +import '../../../../core/audio/noise_reducer.dart'; +import '../../../../core/audio/speech_enhancer.dart'; import '../../../../core/config/api_endpoints.dart'; import '../../../../core/network/dio_client.dart'; import '../../../../core/network/websocket_client.dart'; @@ -207,59 +211,101 @@ class _ChatPageState extends ConsumerState with SingleTickerProviderSt final _messageController = TextEditingController(); final _scrollController = ScrollController(); - // -- Voice input (TODO 40) ------------------------------------------------ - late final stt.SpeechToText _speech; - bool _speechAvailable = false; + // -- Voice input (record + GTCRN denoise + backend STT) ------------------- + late final AudioRecorder _recorder; + final SpeechEnhancer _enhancer = SpeechEnhancer(); bool _isListening = false; + bool _isTranscribing = false; + List> _audioChunks = []; + StreamSubscription>? _audioSubscription; late AnimationController _micPulseController; @override void initState() { super.initState(); - _speech = stt.SpeechToText(); - _initSpeech(); + _recorder = AudioRecorder(); + _enhancer.init(); // load GTCRN model in background _micPulseController = AnimationController( vsync: this, duration: const Duration(milliseconds: 800), ); } - Future _initSpeech() async { - _speechAvailable = await _speech.initialize( - onStatus: (status) { - if (status == 'done' || status == 'notListening') { - _stopListening(autoSubmit: true); - } - }, - onError: (_) => _stopListening(), - ); - if (mounted) setState(() {}); - } + Future _startListening() async { + final hasPermission = await _recorder.hasPermission(); + if (!hasPermission || !mounted) return; - void _startListening() { - if (!_speechAvailable) return; setState(() => _isListening = true); _micPulseController.repeat(reverse: true); - _speech.listen( - onResult: (result) { - _messageController.text = result.recognizedWords; - if (result.finalResult) { - _stopListening(autoSubmit: true); - } - }, - listenFor: const Duration(seconds: 30), - pauseFor: const Duration(seconds: 3), - ); + _audioChunks = []; + + // Stream raw PCM 16kHz mono with platform noise suppression + AGC + final stream = await _recorder.startStream(const RecordConfig( + encoder: AudioEncoder.pcm16bits, + sampleRate: 16000, + numChannels: 1, + noiseSuppress: true, + autoGain: true, + )); + + _audioSubscription = stream.listen((data) { + _audioChunks.add(data); + }); } - void _stopListening({bool autoSubmit = false}) { - _speech.stop(); + Future _stopListening({bool autoSubmit = false}) async { + if (!_isListening) return; + + // Stop recording and stream + await _recorder.stop(); + await _audioSubscription?.cancel(); + _audioSubscription = null; _micPulseController.stop(); _micPulseController.reset(); + if (!mounted) return; setState(() => _isListening = false); - if (autoSubmit && _messageController.text.trim().isNotEmpty) { - _send(); + + if (!autoSubmit || _audioChunks.isEmpty) return; + + // Transcribe via backend + setState(() => _isTranscribing = true); + + try { + // Combine recorded chunks into a single PCM buffer + final allBytes = _audioChunks.expand((c) => c).toList(); + final pcmData = Uint8List.fromList(allBytes); + _audioChunks = []; + + // GTCRN ML denoise (light) + trim leading/trailing silence + final denoised = _enhancer.enhance(pcmData); + final trimmed = NoiseReducer.trimSilence(denoised); + + if (trimmed.isEmpty) { + if (mounted) setState(() => _isTranscribing = false); + return; + } + + // POST to backend /voice/transcribe + final dio = ref.read(dioClientProvider); + final formData = FormData.fromMap({ + 'audio': MultipartFile.fromBytes(trimmed, filename: 'audio.pcm'), + }); + final response = await dio.post( + ApiEndpoints.transcribe, + data: formData, + ); + + final text = + (response.data as Map)['text'] as String? ?? ''; + if (text.isNotEmpty && mounted) { + _messageController.text = text; + _send(); + } + } catch (_) { + // Voice failed silently – user can still type + } finally { + if (mounted) setState(() => _isTranscribing = false); } } @@ -383,28 +429,42 @@ class _ChatPageState extends ConsumerState with SingleTickerProviderSt ), ), - // Voice listening indicator - if (_isListening) + // Voice listening / transcribing indicator + if (_isListening || _isTranscribing) Container( padding: const EdgeInsets.symmetric(vertical: 8, horizontal: 16), - color: AppColors.error.withOpacity(0.1), + color: (_isListening ? AppColors.error : AppColors.primary) + .withOpacity(0.1), child: Row( children: [ - AnimatedBuilder( - animation: _micPulseController, - builder: (context, _) => Icon( - Icons.mic, - color: AppColors.error, - size: 20 + (_micPulseController.value * 4), + if (_isListening) + AnimatedBuilder( + animation: _micPulseController, + builder: (context, _) => Icon( + Icons.mic, + color: AppColors.error, + size: 20 + (_micPulseController.value * 4), + ), + ) + else + const SizedBox( + width: 20, + height: 20, + child: CircularProgressIndicator(strokeWidth: 2), + ), + const SizedBox(width: 8), + Text( + _isListening ? 'Listening...' : 'Transcribing...', + style: TextStyle( + color: _isListening ? AppColors.error : AppColors.primary, ), ), - const SizedBox(width: 8), - const Text('Listening...', style: TextStyle(color: AppColors.error)), const Spacer(), - TextButton( - onPressed: () => _stopListening(), - child: const Text('Cancel'), - ), + if (_isListening) + TextButton( + onPressed: () => _stopListening(), + child: const Text('Cancel'), + ), ], ), ), @@ -459,7 +519,9 @@ class _ChatPageState extends ConsumerState with SingleTickerProviderSt _messageController.dispose(); _scrollController.dispose(); _micPulseController.dispose(); - _speech.stop(); + _audioSubscription?.cancel(); + _recorder.dispose(); + _enhancer.dispose(); super.dispose(); } } diff --git a/it0_app/pubspec.lock b/it0_app/pubspec.lock index 43276a1..748765b 100644 --- a/it0_app/pubspec.lock +++ b/it0_app/pubspec.lock @@ -9,14 +9,6 @@ packages: url: "https://pub.dev" source: hosted version: "85.0.0" - _flutterfire_internals: - dependency: transitive - description: - name: _flutterfire_internals - sha256: "37a42d06068e2fe3deddb2da079a8c4d105f241225ba27b7122b37e9865fd8f7" - url: "https://pub.dev" - source: hosted - version: "1.3.35" analyzer: dependency: transitive description: @@ -177,6 +169,14 @@ packages: url: "https://pub.dev" source: hosted version: "3.0.7" + cupertino_icons: + dependency: "direct main" + description: + name: cupertino_icons + sha256: ba631d1c7f7bef6b729a622b7b752645a2d076dba9976925b8f25725a30e1ee6 + url: "https://pub.dev" + source: hosted + version: "1.0.8" custom_lint_core: dependency: transitive description: @@ -249,54 +249,6 @@ packages: url: "https://pub.dev" source: hosted version: "7.0.1" - firebase_core: - dependency: "direct main" - description: - name: firebase_core - sha256: "26de145bb9688a90962faec6f838247377b0b0d32cc0abecd9a4e43525fc856c" - url: "https://pub.dev" - source: hosted - version: "2.32.0" - firebase_core_platform_interface: - dependency: transitive - description: - name: firebase_core_platform_interface - sha256: "8bcfad6d7033f5ea951d15b867622a824b13812178bfec0c779b9d81de011bbb" - url: "https://pub.dev" - source: hosted - version: "5.4.2" - firebase_core_web: - dependency: transitive - description: - name: firebase_core_web - sha256: "362e52457ed2b7b180964769c1e04d1e0ea0259fdf7025fdfedd019d4ae2bd88" - url: "https://pub.dev" - source: hosted - version: "2.17.5" - firebase_messaging: - dependency: "direct main" - description: - name: firebase_messaging - sha256: a1662cc95d9750a324ad9df349b873360af6f11414902021f130c68ec02267c4 - url: "https://pub.dev" - source: hosted - version: "14.9.4" - firebase_messaging_platform_interface: - dependency: transitive - description: - name: firebase_messaging_platform_interface - sha256: "87c4a922cb6f811cfb7a889bdbb3622702443c52a0271636cbc90d813ceac147" - url: "https://pub.dev" - source: hosted - version: "4.5.37" - firebase_messaging_web: - dependency: transitive - description: - name: firebase_messaging_web - sha256: "0d34dca01a7b103ed7f20138bffbb28eb0e61a677bf9e78a028a932e2c7322d5" - url: "https://pub.dev" - source: hosted - version: "3.8.7" fixnum: dependency: transitive description: @@ -390,6 +342,14 @@ packages: url: "https://pub.dev" source: hosted version: "3.1.2" + flutter_svg: + dependency: "direct main" + description: + name: flutter_svg + sha256: "87fbd7c534435b6c5d9d98b01e1fd527812b82e68ddd8bd35fc45ed0fa8f0a95" + url: "https://pub.dev" + source: hosted + version: "2.2.3" flutter_test: dependency: "direct dev" description: flutter @@ -472,6 +432,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.0" + http: + dependency: transitive + description: + name: http + sha256: "87721a4a50b19c7f1d49001e51409bddc46303966ce89a65af4f4e6004896412" + url: "https://pub.dev" + source: hosted + version: "1.6.0" http_multi_server: dependency: transitive description: @@ -633,15 +601,23 @@ packages: source: hosted version: "2.2.0" path: - dependency: transitive + dependency: "direct main" description: name: path sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5" url: "https://pub.dev" source: hosted version: "1.9.1" - path_provider: + path_parsing: dependency: transitive + description: + name: path_parsing + sha256: "883402936929eac138ee0a45da5b0f2c80f89913e6dc3bf77eb65b84b409c6ca" + url: "https://pub.dev" + source: hosted + version: "1.1.0" + path_provider: + dependency: "direct main" description: name: path_provider sha256: "50c5dd5b6e1aaf6fb3a78b33f6aa3afca52bf903a8a5298f53101fdaee55bbcd" @@ -688,14 +664,6 @@ packages: url: "https://pub.dev" source: hosted version: "2.3.0" - pedantic: - dependency: transitive - description: - name: pedantic - sha256: "67fc27ed9639506c856c840ccce7594d0bdcd91bc8d53d6e52359449a1d50602" - url: "https://pub.dev" - source: hosted - version: "1.11.1" permission_handler: dependency: "direct main" description: @@ -744,6 +712,14 @@ packages: url: "https://pub.dev" source: hosted version: "0.2.1" + petitparser: + dependency: transitive + description: + name: petitparser + sha256: "91bd59303e9f769f108f8df05e371341b15d59e995e6806aefab827b58336675" + url: "https://pub.dev" + source: hosted + version: "7.0.2" platform: dependency: transitive description: @@ -792,6 +768,70 @@ packages: url: "https://pub.dev" source: hosted version: "3.2.2" + record: + dependency: "direct main" + description: + name: record + sha256: d5b6b334f3ab02460db6544e08583c942dbf23e3504bf1e14fd4cbe3d9409277 + url: "https://pub.dev" + source: hosted + version: "6.2.0" + record_android: + dependency: transitive + description: + name: record_android + sha256: "94783f08403aed33ffb68797bf0715b0812eb852f3c7985644c945faea462ba1" + url: "https://pub.dev" + source: hosted + version: "1.5.1" + record_ios: + dependency: transitive + description: + name: record_ios + sha256: "8df7c136131bd05efc19256af29b2ba6ccc000ccc2c80d4b6b6d7a8d21a3b5a9" + url: "https://pub.dev" + source: hosted + version: "1.2.0" + record_linux: + dependency: transitive + description: + name: record_linux + sha256: c31a35cc158cd666fc6395f7f56fc054f31685571684be6b97670a27649ce5c7 + url: "https://pub.dev" + source: hosted + version: "1.3.0" + record_macos: + dependency: transitive + description: + name: record_macos + sha256: "084902e63fc9c0c224c29203d6c75f0bdf9b6a40536c9d916393c8f4c4256488" + url: "https://pub.dev" + source: hosted + version: "1.2.1" + record_platform_interface: + dependency: transitive + description: + name: record_platform_interface + sha256: "8a81dbc4e14e1272a285bbfef6c9136d070a47d9b0d1f40aa6193516253ee2f6" + url: "https://pub.dev" + source: hosted + version: "1.5.0" + record_web: + dependency: transitive + description: + name: record_web + sha256: "7e9846981c1f2d111d86f0ae3309071f5bba8b624d1c977316706f08fc31d16d" + url: "https://pub.dev" + source: hosted + version: "1.3.0" + record_windows: + dependency: transitive + description: + name: record_windows + sha256: "223258060a1d25c62bae18282c16783f28581ec19401d17e56b5205b9f039d78" + url: "https://pub.dev" + source: hosted + version: "1.0.7" riverpod: dependency: transitive description: @@ -896,6 +936,54 @@ packages: url: "https://pub.dev" source: hosted version: "2.0.1" + sherpa_onnx: + dependency: "direct main" + description: + name: sherpa_onnx + sha256: "6f14669c62bb0812c3f99adcd4cae8677037106618cc51ce09d285a4c5db828a" + url: "https://pub.dev" + source: hosted + version: "1.12.25" + sherpa_onnx_android: + dependency: transitive + description: + name: sherpa_onnx_android + sha256: f9881cd42347eac0619298186d86f286ce6b74947a27b8506f6729496ebccc5d + url: "https://pub.dev" + source: hosted + version: "1.12.25" + sherpa_onnx_ios: + dependency: transitive + description: + name: sherpa_onnx_ios + sha256: a9c916340eda3bb24ce4598810fc141469f3d9afd0290390d1cc749044ae919d + url: "https://pub.dev" + source: hosted + version: "1.12.25" + sherpa_onnx_linux: + dependency: transitive + description: + name: sherpa_onnx_linux + sha256: "82d4664ab6df87a76c12987cb420cbe112133d710f9b2e30c4e83d7ad1e93fb0" + url: "https://pub.dev" + source: hosted + version: "1.12.25" + sherpa_onnx_macos: + dependency: transitive + description: + name: sherpa_onnx_macos + sha256: "453fa9a6fdff47d4e8aeef5f9c3ed64327e14769401a16b36213b7a3a3b8aae0" + url: "https://pub.dev" + source: hosted + version: "1.12.25" + sherpa_onnx_windows: + dependency: transitive + description: + name: sherpa_onnx_windows + sha256: c70446773ddab00e8f78b415fe1a580723d49c1f78ad7ce751183620b35c1ffd + url: "https://pub.dev" + source: hosted + version: "1.12.25" sky_engine: dependency: transitive description: flutter @@ -925,30 +1013,6 @@ packages: url: "https://pub.dev" source: hosted version: "1.10.2" - speech_to_text: - dependency: "direct main" - description: - name: speech_to_text - sha256: "57fef1d41bdebe298e84842c89bb4ac91f31cdbec7830c8cb1fc6b91d03abd42" - url: "https://pub.dev" - source: hosted - version: "6.6.0" - speech_to_text_macos: - dependency: transitive - description: - name: speech_to_text_macos - sha256: e685750f7542fcaa087a5396ee471e727ec648bf681f4da83c84d086322173f6 - url: "https://pub.dev" - source: hosted - version: "1.1.0" - speech_to_text_platform_interface: - dependency: transitive - description: - name: speech_to_text_platform_interface - sha256: a1935847704e41ee468aad83181ddd2423d0833abe55d769c59afca07adb5114 - url: "https://pub.dev" - source: hosted - version: "2.3.0" stack_trace: dependency: transitive description: @@ -1093,6 +1157,30 @@ packages: url: "https://pub.dev" source: hosted version: "4.5.2" + vector_graphics: + dependency: transitive + description: + name: vector_graphics + sha256: a4f059dc26fc8295b5921376600a194c4ec7d55e72f2fe4c7d2831e103d461e6 + url: "https://pub.dev" + source: hosted + version: "1.1.19" + vector_graphics_codec: + dependency: transitive + description: + name: vector_graphics_codec + sha256: "99fd9fbd34d9f9a32efd7b6a6aae14125d8237b10403b422a6a6dfeac2806146" + url: "https://pub.dev" + source: hosted + version: "1.1.13" + vector_graphics_compiler: + dependency: transitive + description: + name: vector_graphics_compiler + sha256: "5a88dd14c0954a5398af544651c7fb51b457a2a556949bfb25369b210ef73a74" + url: "https://pub.dev" + source: hosted + version: "1.2.0" vector_math: dependency: transitive description: @@ -1121,18 +1209,18 @@ packages: dependency: transitive description: name: web - sha256: "97da13628db363c635202ad97068d47c5b8aa555808e7a9411963c533b449b27" + sha256: "868d88a33d8a87b18ffc05f9f030ba328ffefba92d6c127917a2ba740f9cfe4a" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "1.1.1" web_socket_channel: dependency: "direct main" description: name: web_socket_channel - sha256: "58c6666b342a38816b2e7e50ed0f1e261959630becd4c879c4f26bfa14aa5a42" + sha256: d88238e5eac9a42bb43ca4e721edba3c08c6354d4a53063afaa568516217621b url: "https://pub.dev" source: hosted - version: "2.4.5" + version: "2.4.0" win32: dependency: transitive description: @@ -1149,6 +1237,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.0" + xml: + dependency: transitive + description: + name: xml + sha256: "971043b3a0d3da28727e40ed3e0b5d18b742fa5a68665cca88e74b7876d5e025" + url: "https://pub.dev" + source: hosted + version: "6.6.1" xterm: dependency: "direct main" description: diff --git a/it0_app/pubspec.yaml b/it0_app/pubspec.yaml index 6af598c..2e3ccb4 100644 --- a/it0_app/pubspec.yaml +++ b/it0_app/pubspec.yaml @@ -9,6 +9,7 @@ environment: dependencies: flutter: sdk: flutter + cupertino_icons: ^1.0.8 # State Management flutter_riverpod: ^2.5.0 @@ -36,13 +37,14 @@ dependencies: flutter_markdown: ^0.7.0 flutter_svg: ^2.0.10+1 - # Push Notifications - firebase_core: ^2.27.0 - firebase_messaging: ^14.7.0 - # Voice - speech_to_text: ^6.6.0 + record: ^6.0.0 flutter_tts: ^4.0.0 + sherpa_onnx: ^1.12.25 + + # File paths + path_provider: ^2.1.0 + path: ^1.9.0 # Terminal xterm: ^4.0.0 @@ -70,11 +72,4 @@ flutter: assets: - assets/images/ - assets/icons/ - - assets/animations/ - - fonts: - - family: Roboto - fonts: - - asset: assets/fonts/Roboto-Regular.ttf - - asset: assets/fonts/Roboto-Bold.ttf - weight: 700 + - assets/gtcrn_simple.onnx diff --git a/packages/services/voice-service/src/api/session_router.py b/packages/services/voice-service/src/api/session_router.py index 74cb7b6..8263c34 100644 --- a/packages/services/voice-service/src/api/session_router.py +++ b/packages/services/voice-service/src/api/session_router.py @@ -1,7 +1,8 @@ import asyncio import uuid -from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Request +from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Request, UploadFile, File +from fastapi.responses import JSONResponse from pydantic import BaseModel from typing import Optional @@ -151,3 +152,18 @@ async def voice_websocket(websocket: WebSocket, session_id: str): await websocket.close() except Exception: pass + + +@router.post("/transcribe") +async def transcribe_audio(req: Request, audio: UploadFile = File(...)): + """Transcribe uploaded audio (PCM 16kHz 16-bit mono) to text using Whisper.""" + stt = getattr(req.app.state, "stt", None) + if stt is None or stt._model is None: + return JSONResponse(status_code=503, content={"error": "STT model not loaded"}) + + audio_data = await audio.read() + if len(audio_data) == 0: + return {"text": ""} + + text = await stt.transcribe(audio_data) + return {"text": text.strip()}