fix(voice-instance): 3 robustness fixes for OpenClaw voice routing

A. terminateVoiceSession: skip voiceSessionManager.terminateSession for
   instance-mode sessions (no SDK loop was started for them)
B. agent.py: call start_voice_session() when instance_id is set regardless
   of engine_type, so _voice_session_started=True and inject mode is used
C. voice/inject: check instance.status === 'running' before firing to bridge

All changes are additive; iAgent paths are unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
hailin 2026-03-10 10:08:10 -07:00
parent 54257ac944
commit 38a9f94b45
2 changed files with 21 additions and 5 deletions

View File

@ -184,6 +184,13 @@ export class VoiceSessionController {
this.gateway.emitStreamEvent(sessionId, { type: 'error', message: '实例不存在' });
return { sessionId, injected: false };
}
if (instance.status !== 'running') {
this.logger.error(`[VoiceInject ${sessionId}] Instance ${instanceId} is ${instance.status}`);
this.gateway.emitStreamEvent(sessionId, {
type: 'error', message: `实例未运行(状态: ${instance.status}`,
});
return { sessionId, injected: false };
}
if (!instance.serverHost) {
this.logger.error(`[VoiceInject ${sessionId}] Instance ${instanceId} has no serverHost`);
this.gateway.emitStreamEvent(sessionId, { type: 'error', message: '实例未配置服务器' });
@ -282,7 +289,13 @@ export class VoiceSessionController {
const session = await this.sessionRepository.findById(sessionId);
if (session && session.tenantId === tenantId) {
await this.voiceSessionManager.terminateSession(sessionId);
// Only terminate the VoiceSessionManager SDK loop for iAgent sessions.
// Instance mode sessions have no SDK loop — calling terminateSession on a
// non-existent session would be a no-op at best, an error at worst.
const isInstanceSession = !!(session.metadata as Record<string, unknown>)?.instanceId;
if (!isInstanceSession) {
await this.voiceSessionManager.terminateSession(sessionId);
}
session.status = 'completed';
session.updatedAt = new Date();

View File

@ -362,10 +362,13 @@ async def entrypoint(ctx: JobContext) -> None:
)
# ── Voice session lifecycle ───────────────────────────────────────────
# For Agent SDK engine: start the long-lived voice session in agent-service.
# This spawns a persistent run loop that accepts injected messages for the
# duration of this call, replacing the per-turn POST /tasks approach.
if engine_type == "claude_agent_sdk":
# Start the long-lived voice session in agent-service when:
# • engine is claude_agent_sdk (iAgent mode), OR
# • an instance_id is set (OpenClaw instance mode — voice/inject routes
# to OpenClaw bridge; voice/start records session but skips SDK loop).
# In both cases _voice_session_started=True ensures _do_stream_voice() is
# used for each speech turn (inject mode) instead of per-turn POST /tasks.
if engine_type == "claude_agent_sdk" or instance_id:
started_session_id = await llm.start_voice_session()
if started_session_id:
logger.info("Long-lived voice session ready: %s", started_session_id)