From 38a9f94b45ffd95efebe6b960c3c17a6218a66c2 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 10 Mar 2026 10:08:10 -0700 Subject: [PATCH] fix(voice-instance): 3 robustness fixes for OpenClaw voice routing A. terminateVoiceSession: skip voiceSessionManager.terminateSession for instance-mode sessions (no SDK loop was started for them) B. agent.py: call start_voice_session() when instance_id is set regardless of engine_type, so _voice_session_started=True and inject mode is used C. voice/inject: check instance.status === 'running' before firing to bridge All changes are additive; iAgent paths are unchanged. Co-Authored-By: Claude Sonnet 4.6 --- .../rest/controllers/voice-session.controller.ts | 15 ++++++++++++++- packages/services/voice-agent/src/agent.py | 11 +++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/packages/services/agent-service/src/interfaces/rest/controllers/voice-session.controller.ts b/packages/services/agent-service/src/interfaces/rest/controllers/voice-session.controller.ts index c74f061..19d6f06 100644 --- a/packages/services/agent-service/src/interfaces/rest/controllers/voice-session.controller.ts +++ b/packages/services/agent-service/src/interfaces/rest/controllers/voice-session.controller.ts @@ -184,6 +184,13 @@ export class VoiceSessionController { this.gateway.emitStreamEvent(sessionId, { type: 'error', message: '实例不存在' }); return { sessionId, injected: false }; } + if (instance.status !== 'running') { + this.logger.error(`[VoiceInject ${sessionId}] Instance ${instanceId} is ${instance.status}`); + this.gateway.emitStreamEvent(sessionId, { + type: 'error', message: `实例未运行(状态: ${instance.status})`, + }); + return { sessionId, injected: false }; + } if (!instance.serverHost) { this.logger.error(`[VoiceInject ${sessionId}] Instance ${instanceId} has no serverHost`); this.gateway.emitStreamEvent(sessionId, { type: 'error', message: '实例未配置服务器' }); @@ -282,7 +289,13 @@ export class VoiceSessionController { const session = await this.sessionRepository.findById(sessionId); if (session && session.tenantId === tenantId) { - await this.voiceSessionManager.terminateSession(sessionId); + // Only terminate the VoiceSessionManager SDK loop for iAgent sessions. + // Instance mode sessions have no SDK loop — calling terminateSession on a + // non-existent session would be a no-op at best, an error at worst. + const isInstanceSession = !!(session.metadata as Record)?.instanceId; + if (!isInstanceSession) { + await this.voiceSessionManager.terminateSession(sessionId); + } session.status = 'completed'; session.updatedAt = new Date(); diff --git a/packages/services/voice-agent/src/agent.py b/packages/services/voice-agent/src/agent.py index ad2ad13..283b2c8 100644 --- a/packages/services/voice-agent/src/agent.py +++ b/packages/services/voice-agent/src/agent.py @@ -362,10 +362,13 @@ async def entrypoint(ctx: JobContext) -> None: ) # ── Voice session lifecycle ─────────────────────────────────────────── - # For Agent SDK engine: start the long-lived voice session in agent-service. - # This spawns a persistent run loop that accepts injected messages for the - # duration of this call, replacing the per-turn POST /tasks approach. - if engine_type == "claude_agent_sdk": + # Start the long-lived voice session in agent-service when: + # • engine is claude_agent_sdk (iAgent mode), OR + # • an instance_id is set (OpenClaw instance mode — voice/inject routes + # to OpenClaw bridge; voice/start records session but skips SDK loop). + # In both cases _voice_session_started=True ensures _do_stream_voice() is + # used for each speech turn (inject mode) instead of per-turn POST /tasks. + if engine_type == "claude_agent_sdk" or instance_id: started_session_id = await llm.start_voice_session() if started_session_id: logger.info("Long-lived voice session ready: %s", started_session_id)