fix: filter thinking content and deduplicate SSE chunks in AntafLLM

Ant Afu returns internal reasoning/thinking process mixed with actual response text, causing TTS to read out internal monologue. Also fixes duplicate text chunks being sent repeatedly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 20:05:42 -07:00 · 2026-04-05 20:05:42 -07:00 · f461e341ba
parent d399a21f23
commit f461e341ba
1 changed files with 28 additions and 2 deletions
--- a/backend/main/xiaozhi-server/core/providers/llm/antaf/antaf.py
+++ b/backend/main/xiaozhi-server/core/providers/llm/antaf/antaf.py
@ -20,6 +20,22 @@ class LLMProvider(LLMProviderBase):
            f"AntafLLM 初始化: bridge={self.bridge_url}, timeout={self.timeout}s"
        )

+    @staticmethod
+    def _is_thinking(text):
+        """检测蚂蚁阿福的内心思考/推理过程，这些不应该发给用户"""
+        thinking_patterns = [
+            "用户问", "用户说", "用户的", "用户可能", "用户真正",
+            "我得", "我会", "我在想", "我决定", "我要",
+            "语气比较", "感觉他", "让他知道", "让他觉得",
+            "先安抚", "得先", "不想表现",
+            "整体语气", "这样能", "这样他",
+            "所以我", "还带了个",
+        ]
+        for p in thinking_patterns:
+            if p in text:
+                return True
+        return False
+
    def response(self, session_id, dialogue, **kwargs):
        # 提取最后一条用户消息
        query = ""
@ -45,6 +61,7 @@ class LLMProvider(LLMProviderBase):
            )
            resp.encoding = "utf-8"

+            seen_texts = set()
            for line in resp.iter_lines(decode_unicode=True):
                if not line:
                    continue
@ -52,7 +69,16 @@ class LLMProvider(LLMProviderBase):
                    data = line[6:]
                    if data == "[DONE]":
                        break
-                    if data and len(data.strip()) > 0:
+                    if not data or len(data.strip()) == 0:
+                        continue
+                    # 去重：跳过完全相同的文本块
+                    if data in seen_texts:
+                        continue
+                    seen_texts.add(data)
+                    # 过滤思考过程
+                    if self._is_thinking(data):
+                        logger.bind(tag=TAG).debug(f"过滤思考内容: {data[:50]}...")
+                        continue
                    yield data

        except requests.exceptions.ConnectionError: