feat: voice mode event filtering — skip tool/thinking events for Agent SDK
1. Remove on_enter greeting entirely (no more race condition) 2. voice-agent sends voiceMode: true when engine_type is claude_agent_sdk 3. AgentController.runTaskStream() filters thinking, tool_use, tool_result events in voice mode — only text, completed, error reach the client 4. Detailed logging: each event logged with [FILTERED-voice] tag when skipped Claude API mode is completely unaffected (voiceMode defaults to false). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7c9fabd891
commit
da17488389
|
|
@ -38,6 +38,7 @@ export class AgentController {
|
||||||
allowedTools?: string[];
|
allowedTools?: string[];
|
||||||
engineType?: string;
|
engineType?: string;
|
||||||
maxContextMessages?: number;
|
maxContextMessages?: number;
|
||||||
|
voiceMode?: boolean;
|
||||||
attachments?: Array<{ base64Data: string; mediaType: string; fileName?: string }>;
|
attachments?: Array<{ base64Data: string; mediaType: string; fileName?: string }>;
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
|
|
@ -118,6 +119,7 @@ export class AgentController {
|
||||||
maxTurns: body.maxTurns || 10,
|
maxTurns: body.maxTurns || 10,
|
||||||
conversationHistory: historyForEngine.length > 0 ? historyForEngine : undefined,
|
conversationHistory: historyForEngine.length > 0 ? historyForEngine : undefined,
|
||||||
resumeSessionId,
|
resumeSessionId,
|
||||||
|
voiceMode: body.voiceMode ?? false,
|
||||||
});
|
});
|
||||||
|
|
||||||
return { sessionId: session.id, taskId: task.id };
|
return { sessionId: session.id, taskId: task.id };
|
||||||
|
|
@ -406,9 +408,17 @@ export class AgentController {
|
||||||
maxTurns: number;
|
maxTurns: number;
|
||||||
conversationHistory?: Array<{ role: 'user' | 'assistant'; content: string | any[] }>;
|
conversationHistory?: Array<{ role: 'user' | 'assistant'; content: string | any[] }>;
|
||||||
resumeSessionId?: string;
|
resumeSessionId?: string;
|
||||||
|
voiceMode?: boolean;
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
const isSdkEngine = engine.engineType === AgentEngineType.CLAUDE_AGENT_SDK;
|
const isSdkEngine = engine.engineType === AgentEngineType.CLAUDE_AGENT_SDK;
|
||||||
|
const voiceMode = params.voiceMode ?? false;
|
||||||
|
/** Event types to suppress in voice mode (only forward text/completed/error) */
|
||||||
|
const voiceFilteredTypes = new Set(['thinking', 'tool_use', 'tool_result']);
|
||||||
|
|
||||||
|
if (voiceMode) {
|
||||||
|
this.logger.log(`[Task ${task.id}] Voice mode ON — filtering ${[...voiceFilteredTypes].join(', ')} events`);
|
||||||
|
}
|
||||||
|
|
||||||
const taskPromise = (async () => {
|
const taskPromise = (async () => {
|
||||||
let finished = false;
|
let finished = false;
|
||||||
|
|
@ -431,8 +441,14 @@ export class AgentController {
|
||||||
|
|
||||||
for await (const event of stream) {
|
for await (const event of stream) {
|
||||||
eventCount++;
|
eventCount++;
|
||||||
this.logger.log(`[Task ${task.id}] Event #${eventCount}: type=${event.type}${event.type === 'text' ? ` len=${(event as any).content?.length}` : ''}${event.type === 'error' ? ` msg=${(event as any).message}` : ''}`);
|
const isFiltered = voiceMode && voiceFilteredTypes.has(event.type);
|
||||||
|
this.logger.log(`[Task ${task.id}] Event #${eventCount}: type=${event.type}${event.type === 'text' ? ` len=${(event as any).content?.length}` : ''}${event.type === 'error' ? ` msg=${(event as any).message}` : ''}${isFiltered ? ' [FILTERED-voice]' : ''}`);
|
||||||
|
|
||||||
|
// In voice mode, skip intermediate events (tool_use, tool_result, thinking)
|
||||||
|
// but still process lifecycle events below (completed/error/approval)
|
||||||
|
if (!isFiltered) {
|
||||||
this.gateway.emitStreamEvent(session.id, event);
|
this.gateway.emitStreamEvent(session.id, event);
|
||||||
|
}
|
||||||
|
|
||||||
// Collect text for assistant message
|
// Collect text for assistant message
|
||||||
if (event.type === 'text') {
|
if (event.type === 'text') {
|
||||||
|
|
|
||||||
|
|
@ -93,14 +93,8 @@ class IT0VoiceAgent(Agent):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def on_enter(self):
|
async def on_enter(self):
|
||||||
"""Called when the agent becomes active — greet the user.
|
"""No greeting — wait for the user to speak first."""
|
||||||
|
pass
|
||||||
Uses session.say() with a static message instead of generate_reply()
|
|
||||||
to avoid triggering the Agent SDK / LLM pipeline for a simple greeting.
|
|
||||||
This prevents a race condition when the user speaks before the
|
|
||||||
greeting LLM task completes.
|
|
||||||
"""
|
|
||||||
self.session.say("你好,我是IT0运维助手,有什么可以帮你的?")
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -225,19 +225,23 @@ class AgentServiceLLMStream(llm.LLMStream):
|
||||||
f"\n用户说:{user_text}"
|
f"\n用户说:{user_text}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Voice mode flag: tell agent-service to filter intermediate events
|
||||||
|
# (tool_use, tool_result, thinking) — only stream text + completed + error
|
||||||
|
voice_mode = engine_type == "claude_agent_sdk"
|
||||||
|
|
||||||
body: dict[str, Any] = {
|
body: dict[str, Any] = {
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"engineType": engine_type,
|
"engineType": engine_type,
|
||||||
|
"voiceMode": voice_mode,
|
||||||
}
|
}
|
||||||
if self._llm_instance._agent_session_id:
|
if self._llm_instance._agent_session_id:
|
||||||
body["sessionId"] = self._llm_instance._agent_session_id
|
body["sessionId"] = self._llm_instance._agent_session_id
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"POST /tasks engine=%s wrapped=%s user_text=%s body.engineType=%s",
|
"POST /tasks engine=%s voiceMode=%s user_text=%s",
|
||||||
engine_type,
|
engine_type,
|
||||||
engine_type == "claude_agent_sdk",
|
voice_mode,
|
||||||
user_text[:80],
|
user_text[:80],
|
||||||
body["engineType"],
|
|
||||||
)
|
)
|
||||||
async with httpx.AsyncClient(
|
async with httpx.AsyncClient(
|
||||||
timeout=httpx.Timeout(connect=10, read=30, write=10, pool=10),
|
timeout=httpx.Timeout(connect=10, read=30, write=10, pool=10),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue