fix: enable simultaneous playback + recording in voice call

Root cause: PcmPlayer called openPlayer() without audio session config,
so Android defaulted to earpiece-only mode. When the mic was actively
recording, playback was silently suppressed — the agent's TTS audio was
sent successfully over WebSocket but never reached the speaker.

Changes:

1. PcmPlayer (pcm_player.dart):
   - Added audio_session package for proper audio session management
   - Configure AudioSession with playAndRecord category so mic + speaker
     work simultaneously
   - Set voiceCommunication usage to enable Android hardware AEC (echo
     cancellation) — prevents feedback loops when speaker is active
   - defaultToSpeaker routes output to loudspeaker instead of earpiece
   - Restored setSpeakerOn() method stub (used by UI toggle)

2. AgentCallPage (agent_call_page.dart):
   - Fixed fire-and-forget bug: _pcmPlayer.feed() returns Future but was
     called without await, causing interleaved feedUint8FromStream calls
   - Added _feedChain serializer to guarantee sequential audio feeding

3. Dependencies:
   - Added audio_session package to pubspec.yaml

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-25 06:48:16 -08:00
parent 75083f23aa
commit e706a4cdc7
5 changed files with 61 additions and 17 deletions

View File

@ -15,6 +15,11 @@ import io.flutter.embedding.engine.FlutterEngine;
public final class GeneratedPluginRegistrant { public final class GeneratedPluginRegistrant {
private static final String TAG = "GeneratedPluginRegistrant"; private static final String TAG = "GeneratedPluginRegistrant";
public static void registerWith(@NonNull FlutterEngine flutterEngine) { public static void registerWith(@NonNull FlutterEngine flutterEngine) {
try {
flutterEngine.getPlugins().add(new com.ryanheise.audio_session.AudioSessionPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin audio_session, com.ryanheise.audio_session.AudioSessionPlugin", e);
}
try { try {
flutterEngine.getPlugins().add(new com.dexterous.flutterlocalnotifications.FlutterLocalNotificationsPlugin()); flutterEngine.getPlugins().add(new com.dexterous.flutterlocalnotifications.FlutterLocalNotificationsPlugin());
} catch (Exception e) { } catch (Exception e) {

View File

@ -1,14 +1,12 @@
import 'dart:typed_data'; import 'dart:typed_data';
import 'package:audio_session/audio_session.dart';
import 'package:flutter_sound/flutter_sound.dart'; import 'package:flutter_sound/flutter_sound.dart';
/// Wraps [FlutterSoundPlayer] for streaming raw PCM 16kHz mono playback /// Wraps [FlutterSoundPlayer] for streaming raw PCM 16kHz mono playback
/// with a simple jitter buffer to smooth out network-induced timing variance. /// with a simple jitter buffer to smooth out network-induced timing variance.
/// ///
/// Usage: /// Uses [AudioSession] to configure playAndRecord mode so playback works
/// final player = PcmPlayer(); /// alongside mic capture, with hardware echo cancellation on Android.
/// await player.init();
/// player.feed(pcmBytes); // call repeatedly as data arrives
/// await player.dispose();
class PcmPlayer { class PcmPlayer {
FlutterSoundPlayer? _player; FlutterSoundPlayer? _player;
bool _initialized = false; bool _initialized = false;
@ -23,9 +21,31 @@ class PcmPlayer {
static const int _bufferThreshold = 4800; static const int _bufferThreshold = 4800;
/// Open the player and start a streaming session. /// Open the player and start a streaming session.
///
/// Configures the audio session for simultaneous playback + recording,
/// routes audio to the loudspeaker, and enables voice-chat mode so
/// Android activates hardware AEC (echo cancellation).
Future<void> init() async { Future<void> init() async {
if (_initialized) return; if (_initialized) return;
// Configure the shared audio session for voice call mode.
// - playAndRecord: allows mic capture and speaker output simultaneously
// - voiceCommunication: enables Android hardware AEC (echo cancellation)
// - defaultToSpeaker: routes audio to loudspeaker instead of earpiece
final session = await AudioSession.instance;
await session.configure(AudioSessionConfiguration(
avAudioSessionCategory: AVAudioSessionCategory.playAndRecord,
avAudioSessionCategoryOptions:
AVAudioSessionCategoryOptions.defaultToSpeaker |
AVAudioSessionCategoryOptions.allowBluetooth,
avAudioSessionMode: AVAudioSessionMode.voiceChat,
androidAudioAttributes: const AndroidAudioAttributes(
contentType: AndroidAudioContentType.speech,
usage: AndroidAudioUsage.voiceCommunication,
),
androidAudioFocusGainType: AndroidAudioFocusGainType.gain,
));
_player = FlutterSoundPlayer(); _player = FlutterSoundPlayer();
await _player!.openPlayer(); await _player!.openPlayer();
@ -44,14 +64,11 @@ class PcmPlayer {
/// ///
/// Incoming chunks are held in a jitter buffer until [_bufferThreshold] bytes /// Incoming chunks are held in a jitter buffer until [_bufferThreshold] bytes
/// have accumulated. Once playback has started, new chunks are forwarded to /// have accumulated. Once playback has started, new chunks are forwarded to
/// the underlying player immediately. If a buffer underrun occurs (the queue /// the underlying player immediately.
/// empties while playing) the next call to [feed] will re-enter the buffering
/// phase, pausing briefly until the threshold is reached again.
Future<void> feed(Uint8List pcmData) async { Future<void> feed(Uint8List pcmData) async {
if (!_initialized || _player == null) return; if (!_initialized || _player == null) return;
if (!_playbackStarted) { if (!_playbackStarted) {
// Still buffering queue and wait until we reach the threshold.
_buffer.add(pcmData); _buffer.add(pcmData);
_bufferedBytes += pcmData.length; _bufferedBytes += pcmData.length;
if (_bufferedBytes >= _bufferThreshold) { if (_bufferedBytes >= _bufferThreshold) {
@ -61,7 +78,6 @@ class PcmPlayer {
return; return;
} }
// Playback already running feed directly.
// ignore: deprecated_member_use // ignore: deprecated_member_use
await _player!.feedUint8FromStream(pcmData); await _player!.feedUint8FromStream(pcmData);
} }
@ -77,8 +93,6 @@ class PcmPlayer {
} }
/// Clear the jitter buffer and reset playback state. /// Clear the jitter buffer and reset playback state.
///
/// Call this on reconnect so stale audio data is not played back.
void reset() { void reset() {
_buffer.clear(); _buffer.clear();
_bufferedBytes = 0; _bufferedBytes = 0;
@ -86,10 +100,15 @@ class PcmPlayer {
} }
/// Toggle speaker mode (earpiece vs loudspeaker). /// Toggle speaker mode (earpiece vs loudspeaker).
///
/// Note: With voiceCommunication usage, Android defaults to earpiece.
/// This method is a placeholder actual switching requires platform
/// channel or AudioManager API. The AudioSession config above already
/// sets defaultToSpeaker.
Future<void> setSpeakerOn(bool on) async { Future<void> setSpeakerOn(bool on) async {
// flutter_sound doesn't expose a direct speaker toggle; // AudioSession configuration already defaults to speaker.
// this would typically use audio_session or method channel. // Fine-grained toggling would need a platform channel to call
// Placeholder for future platform-specific implementation. // Android AudioManager.setSpeakerphoneOn(on).
} }
/// Stop playback and release resources. /// Stop playback and release resources.

View File

@ -332,6 +332,9 @@ class _AgentCallPageState extends ConsumerState<AgentCallPage>
_onCallEnded(); _onCallEnded();
} }
/// Serialises async feed() calls so they don't interleave.
Future<void> _feedChain = Future.value();
/// Handle incoming audio from the agent side. /// Handle incoming audio from the agent side.
void _onAudioReceived(dynamic data) { void _onAudioReceived(dynamic data) {
if (!mounted || _phase != _CallPhase.active) return; if (!mounted || _phase != _CallPhase.active) return;
@ -345,8 +348,8 @@ class _AgentCallPageState extends ConsumerState<AgentCallPage>
return; return;
} }
// Feed to player for real-time playback // Chain feed calls so they run sequentially (feed is async).
_pcmPlayer.feed(pcmData); _feedChain = _feedChain.then((_) => _pcmPlayer.feed(pcmData));
// Drive waveform from actual audio energy (RMS) // Drive waveform from actual audio energy (RMS)
_updateWaveform(pcmData); _updateWaveform(pcmData);

View File

@ -49,6 +49,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "2.13.0" version: "2.13.0"
audio_session:
dependency: "direct main"
description:
name: audio_session
sha256: "8f96a7fecbb718cb093070f868b4cdcb8a9b1053dce342ff8ab2fde10eb9afb7"
url: "https://pub.dev"
source: hosted
version: "0.2.2"
boolean_selector: boolean_selector:
dependency: transitive dependency: transitive
description: description:
@ -960,6 +968,14 @@ packages:
url: "https://pub.dev" url: "https://pub.dev"
source: hosted source: hosted
version: "2.6.4" version: "2.6.4"
rxdart:
dependency: transitive
description:
name: rxdart
sha256: "5c3004a4a8dbb94bd4bf5412a4def4acdaa12e12f269737a5751369e12d1a962"
url: "https://pub.dev"
source: hosted
version: "0.28.0"
shared_preferences: shared_preferences:
dependency: "direct main" dependency: "direct main"
description: description:

View File

@ -60,6 +60,7 @@ dependencies:
uuid: ^4.3.0 uuid: ^4.3.0
url_launcher: ^6.2.0 url_launcher: ^6.2.0
permission_handler: ^11.3.0 permission_handler: ^11.3.0
audio_session: ^0.2.2
dev_dependencies: dev_dependencies:
flutter_test: flutter_test: