From 21998c07778e87fb3bb1595cf0511dbf690e877e Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 7 Apr 2026 02:15:34 -0700 Subject: [PATCH] fix: filter markdown tables, status text, residual formatting from antaf Co-Authored-By: Claude Opus 4.6 (1M context) --- modules/antaf/antaf_llm.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/modules/antaf/antaf_llm.py b/modules/antaf/antaf_llm.py index 78aecf3..c4e606e 100644 --- a/modules/antaf/antaf_llm.py +++ b/modules/antaf/antaf_llm.py @@ -40,11 +40,12 @@ class LLMProvider(LLMProviderBase): @staticmethod def _clean_text(text): - """清理阿福返回文本中的脏数据、链接、markdown""" + """清理阿福返回文本中的脏数据、链接、markdown、表格""" import re # 去掉阿福内部状态文本 junk = [ "完成资料引用", "内容生成", "正在思考", "正在搜索", + "开始获取资料", "找到资料", ] for j in junk: text = text.replace(j, "") @@ -52,12 +53,21 @@ class LLMProvider(LLMProviderBase): text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # 裸URL text = re.sub(r'https?://\S+', '', text) - # Markdown加粗 **文字** → 文字 - text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) + # Markdown加粗 **文字** → 文字(包括空的 ****) + text = re.sub(r'\*{2,}([^*]*)\*{2,}', r'\1', text) # Markdown斜体 *文字* → 文字 text = re.sub(r'\*([^*]+)\*', r'\1', text) - # 多余空格 + # 残留星号 + text = text.replace('*', '') + # Markdown表格行 | xxx | xxx | + text = re.sub(r'\|[^|]*\|[^|]*\|[^|\n]*\|?', '', text) + # 表格对齐行 | :--- | :--- | + text = re.sub(r'\|\s*:?-+:?\s*(\|\s*:?-+:?\s*)+\|?', '', text) + # Markdown标题 ### 文字 + text = re.sub(r'#{1,6}\s*', '', text) + # 多余空格和空行 text = re.sub(r' +', ' ', text) + text = re.sub(r'\n{2,}', '\n', text) return text.strip() @staticmethod