feat: enable token-level streaming and fix duplicate message bubble
Backend: - Add includePartialMessages: true to SDK query options - Handle stream_event/content_block_delta for real-time text streaming - Skip text/thinking blocks from complete assistant messages (already streamed via deltas) to avoid duplication - Change default result summary to empty string Flutter: - Only show CompletedEvent summary when no assistant text was streamed (prevents duplicate message bubble) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5f827b0961
commit
74be945e4a
|
|
@ -230,7 +230,12 @@ class ChatNotifier extends StateNotifier<ChatState> {
|
|||
);
|
||||
|
||||
case CompletedEvent(:final summary):
|
||||
if (summary.isNotEmpty) {
|
||||
// Only show summary as a message if there were no assistant text messages
|
||||
// (avoids duplicate bubble when the SDK already streamed the full response)
|
||||
final hasAssistantText = state.messages.any(
|
||||
(m) => m.role == MessageRole.assistant && m.type == MessageType.text && m.content.isNotEmpty,
|
||||
);
|
||||
if (summary.isNotEmpty && !hasAssistantText) {
|
||||
_appendOrUpdateAssistantMessage(summary, MessageType.text);
|
||||
}
|
||||
state = state.copyWith(agentStatus: AgentStatus.idle);
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
|
|||
maxBudgetUsd: params.maxBudgetUsd,
|
||||
env,
|
||||
abortController,
|
||||
includePartialMessages: true,
|
||||
allowDangerouslySkipPermissions: true,
|
||||
permissionMode: 'bypassPermissions',
|
||||
stderr: (data: string) => {
|
||||
|
|
@ -273,6 +274,7 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
|
|||
prompt: message,
|
||||
options: {
|
||||
resume: sdkSessionId,
|
||||
includePartialMessages: true,
|
||||
env,
|
||||
abortController,
|
||||
allowDangerouslySkipPermissions: true,
|
||||
|
|
@ -352,15 +354,28 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
|
|||
private mapSdkMessage(message: any): EngineStreamEvent[] {
|
||||
const events: EngineStreamEvent[] = [];
|
||||
|
||||
if (message.type === 'assistant') {
|
||||
if (message.type === 'stream_event') {
|
||||
// Token-level streaming from includePartialMessages: true
|
||||
const streamEvent = message.event;
|
||||
if (!streamEvent) return events;
|
||||
|
||||
if (streamEvent.type === 'content_block_delta') {
|
||||
const delta = streamEvent.delta;
|
||||
if (delta?.type === 'text_delta' && delta.text) {
|
||||
events.push({ type: 'text', content: delta.text });
|
||||
} else if (delta?.type === 'thinking_delta' && delta.thinking) {
|
||||
events.push({ type: 'thinking', content: delta.thinking });
|
||||
}
|
||||
}
|
||||
// Ignore content_block_start/stop/message_start/message_stop — they are structural
|
||||
} else if (message.type === 'assistant') {
|
||||
// Complete assistant message (also emitted after streaming)
|
||||
// When streaming is enabled, skip full blocks to avoid duplicate text
|
||||
// Only forward tool_use and tool_result which aren't streamed as deltas
|
||||
const content = message.message?.content;
|
||||
if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === 'thinking') {
|
||||
events.push({ type: 'thinking', content: block.thinking ?? '' });
|
||||
} else if (block.type === 'text') {
|
||||
events.push({ type: 'text', content: block.text ?? '' });
|
||||
} else if (block.type === 'tool_use') {
|
||||
if (block.type === 'tool_use') {
|
||||
events.push({
|
||||
type: 'tool_use',
|
||||
toolName: block.name ?? 'unknown',
|
||||
|
|
@ -376,12 +391,13 @@ export class ClaudeAgentSdkEngine implements AgentEnginePort {
|
|||
isError: block.is_error ?? false,
|
||||
});
|
||||
}
|
||||
// text and thinking blocks are already streamed via stream_event deltas
|
||||
}
|
||||
}
|
||||
} else if (message.type === 'result') {
|
||||
events.push({
|
||||
type: 'completed',
|
||||
summary: message.result ?? 'Task completed',
|
||||
summary: message.result ?? '',
|
||||
tokensUsed: message.usage
|
||||
? (message.usage.input_tokens ?? 0) + (message.usage.output_tokens ?? 0)
|
||||
: undefined,
|
||||
|
|
|
|||
Loading…
Reference in New Issue