feat: streaming claude-api engine, engineType override, fix voice test page

- Claude API engine now uses streaming API (messages.stream) for real-time
  text delta output instead of waiting for full response
- Agent controller accepts optional engineType body parameter to allow
  callers (e.g. voice pipeline) to select a specific engine
- Fix voice_test_page.dart compilation error: replace audioplayers (not
  installed) with flutter_sound (already in pubspec.yaml)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-02-24 05:30:11 -08:00
parent 6e832c7615
commit 5d4fd96d43
3 changed files with 70 additions and 25 deletions

View File

@ -4,7 +4,7 @@ import 'package:flutter/material.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import 'package:dio/dio.dart';
import 'package:record/record.dart';
import 'package:audioplayers/audioplayers.dart';
import 'package:flutter_sound/flutter_sound.dart';
import '../../../../core/network/dio_client.dart';
/// Temporary voice I/O test page TTS + STT + Round-trip.
@ -19,8 +19,9 @@ class _VoiceTestPageState extends ConsumerState<VoiceTestPage> {
final _ttsController = TextEditingController(
text: '你好我是IT0运维助手。很高兴为您服务',
);
final _audioPlayer = AudioPlayer();
final _audioPlayer = FlutterSoundPlayer();
final _recorder = AudioRecorder();
bool _playerInitialized = false;
String _ttsStatus = '';
String _sttStatus = '';
@ -46,10 +47,21 @@ class _VoiceTestPageState extends ConsumerState<VoiceTestPage> {
/// Dio for JSON responses (STT).
Dio get _dioJson => ref.read(dioClientProvider);
@override
void initState() {
super.initState();
_initPlayer();
}
Future<void> _initPlayer() async {
await _audioPlayer.openPlayer();
_playerInitialized = true;
}
@override
void dispose() {
_ttsController.dispose();
_audioPlayer.dispose();
if (_playerInitialized) _audioPlayer.closePlayer();
_recorder.dispose();
super.dispose();
}
@ -73,7 +85,7 @@ class _VoiceTestPageState extends ConsumerState<VoiceTestPage> {
setState(() {
_ttsStatus = '完成!耗时 ${sw.elapsedMilliseconds}ms大小 ${(bytes.length / 1024).toStringAsFixed(1)}KB';
});
await _audioPlayer.play(BytesSource(Uint8List.fromList(bytes)));
await _playWavBytes(Uint8List.fromList(bytes));
} catch (e) {
sw.stop();
setState(() => _ttsStatus = '错误: $e');
@ -191,7 +203,7 @@ class _VoiceTestPageState extends ConsumerState<VoiceTestPage> {
_rtResult += '\nTTS (${ttsSw.elapsedMilliseconds}ms): ${(audioBytes.length / 1024).toStringAsFixed(1)}KB';
_rtStatus = '完成STT=${sttSw.elapsedMilliseconds}ms + TTS=${ttsSw.elapsedMilliseconds}ms = ${totalSw.elapsedMilliseconds}ms';
});
await _audioPlayer.play(BytesSource(Uint8List.fromList(audioBytes)));
await _playWavBytes(Uint8List.fromList(audioBytes));
} catch (e) {
totalSw.stop();
setState(() {
@ -200,6 +212,16 @@ class _VoiceTestPageState extends ConsumerState<VoiceTestPage> {
}
}
/// Play WAV bytes through flutter_sound player.
Future<void> _playWavBytes(Uint8List wavBytes) async {
if (!_playerInitialized) return;
await _audioPlayer.startPlayer(
fromDataBuffer: wavBytes,
codec: Codec.pcm16WAV,
whenFinished: () {},
);
}
@override
Widget build(BuildContext context) {
return Scaffold(

View File

@ -95,31 +95,52 @@ export class ClaudeApiEngine implements AgentEnginePort {
requestParams.tools = tools;
}
const response = await client.messages.create(requestParams, {
// Use streaming API for token-level output
const stream = client.messages.stream(requestParams, {
signal: abortController.signal as any,
});
const contentBlocks: AnthropicContentBlock[] = [];
const toolUseBlocks: Array<{ id: string; name: string; input: Record<string, unknown> }> = [];
// Stream text deltas in real-time
for await (const event of stream) {
if (event.type === 'content_block_delta') {
const delta = (event as any).delta;
if (delta?.type === 'text_delta' && delta.text) {
yield { type: 'text' as const, content: delta.text };
} else if (delta?.type === 'thinking_delta' && delta.thinking) {
yield { type: 'thinking' as const, content: delta.thinking };
}
} else if (event.type === 'content_block_start') {
const block = (event as any).content_block;
if (block?.type === 'tool_use') {
contentBlocks.push(block);
}
} else if (event.type === 'content_block_stop') {
// Tool use blocks are fully accumulated at stop
}
}
// Get final message for tool use and usage
const response = await stream.finalMessage();
// Track token usage
if (response.usage) {
totalTokensUsed += (response.usage.input_tokens ?? 0) + (response.usage.output_tokens ?? 0);
}
// Process content blocks from the response
const contentBlocks = response.content as AnthropicContentBlock[];
const toolUseBlocks: Array<{ id: string; name: string; input: Record<string, unknown> }> = [];
for (const block of contentBlocks) {
if (block.type === 'text' && block.text) {
yield { type: 'text', content: block.text };
} else if (block.type === 'tool_use') {
// Collect tool_use blocks from final response
for (const block of response.content) {
if (block.type === 'tool_use') {
yield {
type: 'tool_use',
toolName: block.name!,
type: 'tool_use' as const,
toolName: block.name,
input: (block.input as Record<string, unknown>) ?? {},
};
toolUseBlocks.push({
id: block.id!,
name: block.name!,
id: block.id,
name: block.name,
input: (block.input as Record<string, unknown>) ?? {},
});
}
@ -127,14 +148,13 @@ export class ClaudeApiEngine implements AgentEnginePort {
// Check stop reason
if (response.stop_reason === 'end_turn' || toolUseBlocks.length === 0) {
// Extract final text as summary
const summaryBlock = contentBlocks.find(
(b) => b.type === 'text' && b.text,
const summaryBlock = response.content.find(
(b: any) => b.type === 'text' && b.text,
);
const summary = summaryBlock?.text ?? 'Task completed';
const summary = (summaryBlock as any)?.text ?? 'Task completed';
yield {
type: 'completed',
type: 'completed' as const,
summary,
tokensUsed: totalTokensUsed,
};

View File

@ -25,9 +25,12 @@ export class AgentController {
@Post('tasks')
async executeTask(
@TenantId() tenantId: string,
@Body() body: { prompt: string; systemPrompt?: string; maxTurns?: number; allowedTools?: string[] },
@Body() body: { prompt: string; systemPrompt?: string; maxTurns?: number; allowedTools?: string[]; engineType?: string },
) {
const engine = this.engineRegistry.getActiveEngine();
// Allow callers to override the engine (e.g. voice uses claude_api for streaming)
const engine = body.engineType
? this.engineRegistry.switchEngine(body.engineType as AgentEngineType)
: this.engineRegistry.getActiveEngine();
const session = new AgentSession();
session.id = crypto.randomUUID();