diff --git a/packages/services/voice-agent/src/plugins/agent_llm.py b/packages/services/voice-agent/src/plugins/agent_llm.py index 3bbae12..e6939fe 100644 --- a/packages/services/voice-agent/src/plugins/agent_llm.py +++ b/packages/services/voice-agent/src/plugins/agent_llm.py @@ -211,29 +211,28 @@ class AgentServiceLLMStream(llm.LLMStream): # 2. Create agent task (with timeout) engine_type = self._llm_instance._engine_type - prompt = user_text - - # Agent SDK mode: instruct the agent to output concise spoken Chinese - # (skip tool-call details and intermediate steps) - if engine_type == "claude_agent_sdk": - prompt = ( - "【语音对话模式】你正在通过语音与用户实时对话。请严格遵守以下规则:\n" - "1. 只输出用户关注的最终答案,不要输出工具调用过程、中间步骤或技术细节\n" - "2. 用简洁自然的口语中文回答,像面对面对话一样\n" - "3. 回复要简短精炼,适合语音播报,通常1-3句话即可\n" - "4. 不要使用markdown格式、代码块、列表符号等文本格式\n" - f"\n用户说:{user_text}" - ) # Voice mode flag: tell agent-service to filter intermediate events # (tool_use, tool_result, thinking) — only stream text + completed + error voice_mode = engine_type == "claude_agent_sdk" body: dict[str, Any] = { - "prompt": prompt, + "prompt": user_text, # always send clean user text (no wrapping) "engineType": engine_type, "voiceMode": voice_mode, } + + # Agent SDK mode: set systemPrompt once (not per-message) so + # conversation history stays clean — identical to text chat pattern + if voice_mode: + body["systemPrompt"] = ( + "你正在通过语音与用户实时对话。请严格遵守以下规则:\n" + "1. 只输出用户关注的最终答案,不要输出工具调用过程、中间步骤或技术细节\n" + "2. 用简洁自然的口语中文回答,像面对面对话一样\n" + "3. 回复要简短精炼,适合语音播报,通常1-3句话即可\n" + "4. 不要使用markdown格式、代码块、列表符号等文本格式" + ) + if self._llm_instance._agent_session_id: body["sessionId"] = self._llm_instance._agent_session_id