fix: enable simultaneous playback + recording in voice call

Root cause: PcmPlayer called openPlayer() without audio session config,
so Android defaulted to earpiece-only mode. When the mic was actively
recording, playback was silently suppressed — the agent's TTS audio was
sent successfully over WebSocket but never reached the speaker.

Changes:

1. PcmPlayer (pcm_player.dart):
   - Added audio_session package for proper audio session management
   - Configure AudioSession with playAndRecord category so mic + speaker
     work simultaneously
   - Set voiceCommunication usage to enable Android hardware AEC (echo
     cancellation) — prevents feedback loops when speaker is active
   - defaultToSpeaker routes output to loudspeaker instead of earpiece
   - Restored setSpeakerOn() method stub (used by UI toggle)

2. AgentCallPage (agent_call_page.dart):
   - Fixed fire-and-forget bug: _pcmPlayer.feed() returns Future but was
     called without await, causing interleaved feedUint8FromStream calls
   - Added _feedChain serializer to guarantee sequential audio feeding

3. Dependencies:
   - Added audio_session package to pubspec.yaml

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-25 06:48:16 -08:00
parent 75083f23aa
commit e706a4cdc7
5 changed files with 61 additions and 17 deletions

View File

@ -15,6 +15,11 @@ import io.flutter.embedding.engine.FlutterEngine;
public final class GeneratedPluginRegistrant {
private static final String TAG = "GeneratedPluginRegistrant";
public static void registerWith(@NonNull FlutterEngine flutterEngine) {
try {
flutterEngine.getPlugins().add(new com.ryanheise.audio_session.AudioSessionPlugin());
} catch (Exception e) {
Log.e(TAG, "Error registering plugin audio_session, com.ryanheise.audio_session.AudioSessionPlugin", e);
}
try {
flutterEngine.getPlugins().add(new com.dexterous.flutterlocalnotifications.FlutterLocalNotificationsPlugin());
} catch (Exception e) {

View File

@ -1,14 +1,12 @@
import 'dart:typed_data';
import 'package:audio_session/audio_session.dart';
import 'package:flutter_sound/flutter_sound.dart';
/// Wraps [FlutterSoundPlayer] for streaming raw PCM 16kHz mono playback
/// with a simple jitter buffer to smooth out network-induced timing variance.
///
/// Usage:
/// final player = PcmPlayer();
/// await player.init();
/// player.feed(pcmBytes); // call repeatedly as data arrives
/// await player.dispose();
/// Uses [AudioSession] to configure playAndRecord mode so playback works
/// alongside mic capture, with hardware echo cancellation on Android.
class PcmPlayer {
FlutterSoundPlayer? _player;
bool _initialized = false;
@ -23,9 +21,31 @@ class PcmPlayer {
static const int _bufferThreshold = 4800;
/// Open the player and start a streaming session.
///
/// Configures the audio session for simultaneous playback + recording,
/// routes audio to the loudspeaker, and enables voice-chat mode so
/// Android activates hardware AEC (echo cancellation).
Future<void> init() async {
if (_initialized) return;
// Configure the shared audio session for voice call mode.
// - playAndRecord: allows mic capture and speaker output simultaneously
// - voiceCommunication: enables Android hardware AEC (echo cancellation)
// - defaultToSpeaker: routes audio to loudspeaker instead of earpiece
final session = await AudioSession.instance;
await session.configure(AudioSessionConfiguration(
avAudioSessionCategory: AVAudioSessionCategory.playAndRecord,
avAudioSessionCategoryOptions:
AVAudioSessionCategoryOptions.defaultToSpeaker |
AVAudioSessionCategoryOptions.allowBluetooth,
avAudioSessionMode: AVAudioSessionMode.voiceChat,
androidAudioAttributes: const AndroidAudioAttributes(
contentType: AndroidAudioContentType.speech,
usage: AndroidAudioUsage.voiceCommunication,
),
androidAudioFocusGainType: AndroidAudioFocusGainType.gain,
));
_player = FlutterSoundPlayer();
await _player!.openPlayer();
@ -44,14 +64,11 @@ class PcmPlayer {
///
/// Incoming chunks are held in a jitter buffer until [_bufferThreshold] bytes
/// have accumulated. Once playback has started, new chunks are forwarded to
/// the underlying player immediately. If a buffer underrun occurs (the queue
/// empties while playing) the next call to [feed] will re-enter the buffering
/// phase, pausing briefly until the threshold is reached again.
/// the underlying player immediately.
Future<void> feed(Uint8List pcmData) async {
if (!_initialized || _player == null) return;
if (!_playbackStarted) {
// Still buffering queue and wait until we reach the threshold.
_buffer.add(pcmData);
_bufferedBytes += pcmData.length;
if (_bufferedBytes >= _bufferThreshold) {
@ -61,7 +78,6 @@ class PcmPlayer {
return;
}
// Playback already running feed directly.
// ignore: deprecated_member_use
await _player!.feedUint8FromStream(pcmData);
}
@ -77,8 +93,6 @@ class PcmPlayer {
}
/// Clear the jitter buffer and reset playback state.
///
/// Call this on reconnect so stale audio data is not played back.
void reset() {
_buffer.clear();
_bufferedBytes = 0;
@ -86,10 +100,15 @@ class PcmPlayer {
}
/// Toggle speaker mode (earpiece vs loudspeaker).
///
/// Note: With voiceCommunication usage, Android defaults to earpiece.
/// This method is a placeholder actual switching requires platform
/// channel or AudioManager API. The AudioSession config above already
/// sets defaultToSpeaker.
Future<void> setSpeakerOn(bool on) async {
// flutter_sound doesn't expose a direct speaker toggle;
// this would typically use audio_session or method channel.
// Placeholder for future platform-specific implementation.
// AudioSession configuration already defaults to speaker.
// Fine-grained toggling would need a platform channel to call
// Android AudioManager.setSpeakerphoneOn(on).
}
/// Stop playback and release resources.

View File

@ -332,6 +332,9 @@ class _AgentCallPageState extends ConsumerState<AgentCallPage>
_onCallEnded();
}
/// Serialises async feed() calls so they don't interleave.
Future<void> _feedChain = Future.value();
/// Handle incoming audio from the agent side.
void _onAudioReceived(dynamic data) {
if (!mounted || _phase != _CallPhase.active) return;
@ -345,8 +348,8 @@ class _AgentCallPageState extends ConsumerState<AgentCallPage>
return;
}
// Feed to player for real-time playback
_pcmPlayer.feed(pcmData);
// Chain feed calls so they run sequentially (feed is async).
_feedChain = _feedChain.then((_) => _pcmPlayer.feed(pcmData));
// Drive waveform from actual audio energy (RMS)
_updateWaveform(pcmData);

View File

@ -49,6 +49,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.13.0"
audio_session:
dependency: "direct main"
description:
name: audio_session
sha256: "8f96a7fecbb718cb093070f868b4cdcb8a9b1053dce342ff8ab2fde10eb9afb7"
url: "https://pub.dev"
source: hosted
version: "0.2.2"
boolean_selector:
dependency: transitive
description:
@ -960,6 +968,14 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.6.4"
rxdart:
dependency: transitive
description:
name: rxdart
sha256: "5c3004a4a8dbb94bd4bf5412a4def4acdaa12e12f269737a5751369e12d1a962"
url: "https://pub.dev"
source: hosted
version: "0.28.0"
shared_preferences:
dependency: "direct main"
description:

View File

@ -60,6 +60,7 @@ dependencies:
uuid: ^4.3.0
url_launcher: ^6.2.0
permission_handler: ^11.3.0
audio_session: ^0.2.2
dev_dependencies:
flutter_test: