diff --git a/backend/main/xiaozhi-server/core/handle/sendAudioHandle.py b/backend/main/xiaozhi-server/core/handle/sendAudioHandle.py index 2870e67..53c53ad 100644 --- a/backend/main/xiaozhi-server/core/handle/sendAudioHandle.py +++ b/backend/main/xiaozhi-server/core/handle/sendAudioHandle.py @@ -44,15 +44,18 @@ async def sendAudioMessage(conn: "ConnectionHandler", sentenceType, audios, text # 发送结束消息(如果是最后一个文本) if sentenceType == SentenceType.LAST: - await send_tts_message(conn, "stop", None) - conn.client_is_speaking = False - # Send system idle command if LLM requested it (e.g. antaf bridge unavailable) if getattr(conn, 'send_idle_after_tts', False): + # Send system idle instead of tts stop — goes Speaking→Idle directly, + # avoids Speaking→Listening which triggers SendStartListening conn.send_idle_after_tts = False + conn.client_is_speaking = False await conn.websocket.send( json.dumps({"type": "system", "command": "idle"}) ) - conn.logger.bind(tag=TAG).info("Sent system idle to device") + conn.logger.bind(tag=TAG).info("Sent system idle to device (skip tts stop)") + else: + await send_tts_message(conn, "stop", None) + conn.client_is_speaking = False if conn.close_after_chat: await conn.close()