feat: voice mode event filtering — skip tool/thinking events for Agent SDK
1. Remove on_enter greeting entirely (no more race condition) 2. voice-agent sends voiceMode: true when engine_type is claude_agent_sdk 3. AgentController.runTaskStream() filters thinking, tool_use, tool_result events in voice mode — only text, completed, error reach the client 4. Detailed logging: each event logged with [FILTERED-voice] tag when skipped Claude API mode is completely unaffected (voiceMode defaults to false). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7c9fabd891
commit
da17488389
|
|
@ -38,6 +38,7 @@ export class AgentController {
|
|||
allowedTools?: string[];
|
||||
engineType?: string;
|
||||
maxContextMessages?: number;
|
||||
voiceMode?: boolean;
|
||||
attachments?: Array<{ base64Data: string; mediaType: string; fileName?: string }>;
|
||||
},
|
||||
) {
|
||||
|
|
@ -118,6 +119,7 @@ export class AgentController {
|
|||
maxTurns: body.maxTurns || 10,
|
||||
conversationHistory: historyForEngine.length > 0 ? historyForEngine : undefined,
|
||||
resumeSessionId,
|
||||
voiceMode: body.voiceMode ?? false,
|
||||
});
|
||||
|
||||
return { sessionId: session.id, taskId: task.id };
|
||||
|
|
@ -406,9 +408,17 @@ export class AgentController {
|
|||
maxTurns: number;
|
||||
conversationHistory?: Array<{ role: 'user' | 'assistant'; content: string | any[] }>;
|
||||
resumeSessionId?: string;
|
||||
voiceMode?: boolean;
|
||||
},
|
||||
) {
|
||||
const isSdkEngine = engine.engineType === AgentEngineType.CLAUDE_AGENT_SDK;
|
||||
const voiceMode = params.voiceMode ?? false;
|
||||
/** Event types to suppress in voice mode (only forward text/completed/error) */
|
||||
const voiceFilteredTypes = new Set(['thinking', 'tool_use', 'tool_result']);
|
||||
|
||||
if (voiceMode) {
|
||||
this.logger.log(`[Task ${task.id}] Voice mode ON — filtering ${[...voiceFilteredTypes].join(', ')} events`);
|
||||
}
|
||||
|
||||
const taskPromise = (async () => {
|
||||
let finished = false;
|
||||
|
|
@ -431,8 +441,14 @@ export class AgentController {
|
|||
|
||||
for await (const event of stream) {
|
||||
eventCount++;
|
||||
this.logger.log(`[Task ${task.id}] Event #${eventCount}: type=${event.type}${event.type === 'text' ? ` len=${(event as any).content?.length}` : ''}${event.type === 'error' ? ` msg=${(event as any).message}` : ''}`);
|
||||
this.gateway.emitStreamEvent(session.id, event);
|
||||
const isFiltered = voiceMode && voiceFilteredTypes.has(event.type);
|
||||
this.logger.log(`[Task ${task.id}] Event #${eventCount}: type=${event.type}${event.type === 'text' ? ` len=${(event as any).content?.length}` : ''}${event.type === 'error' ? ` msg=${(event as any).message}` : ''}${isFiltered ? ' [FILTERED-voice]' : ''}`);
|
||||
|
||||
// In voice mode, skip intermediate events (tool_use, tool_result, thinking)
|
||||
// but still process lifecycle events below (completed/error/approval)
|
||||
if (!isFiltered) {
|
||||
this.gateway.emitStreamEvent(session.id, event);
|
||||
}
|
||||
|
||||
// Collect text for assistant message
|
||||
if (event.type === 'text') {
|
||||
|
|
|
|||
|
|
@ -93,14 +93,8 @@ class IT0VoiceAgent(Agent):
|
|||
)
|
||||
|
||||
async def on_enter(self):
|
||||
"""Called when the agent becomes active — greet the user.
|
||||
|
||||
Uses session.say() with a static message instead of generate_reply()
|
||||
to avoid triggering the Agent SDK / LLM pipeline for a simple greeting.
|
||||
This prevents a race condition when the user speaks before the
|
||||
greeting LLM task completes.
|
||||
"""
|
||||
self.session.say("你好,我是IT0运维助手,有什么可以帮你的?")
|
||||
"""No greeting — wait for the user to speak first."""
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -225,19 +225,23 @@ class AgentServiceLLMStream(llm.LLMStream):
|
|||
f"\n用户说:{user_text}"
|
||||
)
|
||||
|
||||
# Voice mode flag: tell agent-service to filter intermediate events
|
||||
# (tool_use, tool_result, thinking) — only stream text + completed + error
|
||||
voice_mode = engine_type == "claude_agent_sdk"
|
||||
|
||||
body: dict[str, Any] = {
|
||||
"prompt": prompt,
|
||||
"engineType": engine_type,
|
||||
"voiceMode": voice_mode,
|
||||
}
|
||||
if self._llm_instance._agent_session_id:
|
||||
body["sessionId"] = self._llm_instance._agent_session_id
|
||||
|
||||
logger.info(
|
||||
"POST /tasks engine=%s wrapped=%s user_text=%s body.engineType=%s",
|
||||
"POST /tasks engine=%s voiceMode=%s user_text=%s",
|
||||
engine_type,
|
||||
engine_type == "claude_agent_sdk",
|
||||
voice_mode,
|
||||
user_text[:80],
|
||||
body["engineType"],
|
||||
)
|
||||
async with httpx.AsyncClient(
|
||||
timeout=httpx.Timeout(connect=10, read=30, write=10, pool=10),
|
||||
|
|
|
|||
Loading…
Reference in New Issue