From e16ec7930dfd98592c8c002d81aabd5deee4b371 Mon Sep 17 00:00:00 2001 From: hailin Date: Fri, 6 Feb 2026 22:58:19 -0800 Subject: [PATCH] feat(knowledge): add file upload with text extraction for knowledge base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 支持在管理后台知识库页面上传文件(PDF、Word、TXT、Markdown), 自动提取文本内容,管理员预览编辑后保存为知识库文章。 ## 后端 (knowledge-service) - 新增 TextExtractionService:文件文本提取服务 - PDF 提取:使用 pdf-parse v2 (PDFParse class API) - Word (.docx) 提取:使用 mammoth.extractRawText() - TXT/Markdown:直接 UTF-8 解码 - 支持中英文混合字数统计 - 文件大小限制 200MB,类型校验(MIME 白名单) - 空文本 PDF(扫描件/图片)返回友好错误提示 - 新增上传接口:POST /knowledge/articles/upload - 使用 NestJS FileInterceptor 处理 multipart/form-data - 仅提取文本并返回,不直接创建文章(两步流程) - 返回:extractedText, suggestedTitle, wordCount, pageCount - 新增 ExtractedTextResponse DTO - KnowledgeModule 注册 TextExtractionService ## 前端 (admin-client) - knowledge.api.ts:新增 uploadFile() 方法(FormData + 120s 超时) - useKnowledge.ts:新增 useUploadKnowledgeFile hook - KnowledgePage.tsx: - 新增 Segmented 切换器(手动输入 / 文件上传),仅新建时显示 - 文件上传模式显示 Upload.Dragger 拖拽上传区域 - 上传后自动提取文本,填入标题+内容字段 - 提取完成自动切回手动模式,管理员可预览编辑后保存 - 显示提取结果(字数、页数) ## 用户流程 新建文章 → 切换"文件上传" → 拖入/选择文件 → 系统提取文本 → 自动填入标题+内容 → 管理员编辑确认 → 点击保存 ## 依赖 - pdf-parse@^2.4.5(PDF 文本提取) - mammoth@^1.8.0(Word 文档文本提取) Co-Authored-By: Claude Opus 4.6 --- .../features/knowledge/application/index.ts | 1 + .../knowledge/application/useKnowledge.ts | 9 + .../knowledge/infrastructure/index.ts | 2 +- .../knowledge/infrastructure/knowledge.api.ts | 17 ++ .../presentation/pages/KnowledgePage.tsx | 77 +++++- .../services/knowledge-service/package.json | 5 +- .../adapters/inbound/knowledge.controller.ts | 37 +++ .../src/application/dtos/knowledge.dto.ts | 7 + .../services/text-extraction.service.ts | 135 +++++++++++ .../src/knowledge/knowledge.module.ts | 2 + pnpm-lock.yaml | 229 +++++++++++++++++- 11 files changed, 506 insertions(+), 15 deletions(-) create mode 100644 packages/services/knowledge-service/src/application/services/text-extraction.service.ts diff --git a/packages/admin-client/src/features/knowledge/application/index.ts b/packages/admin-client/src/features/knowledge/application/index.ts index 4eef869..58071a6 100644 --- a/packages/admin-client/src/features/knowledge/application/index.ts +++ b/packages/admin-client/src/features/knowledge/application/index.ts @@ -5,5 +5,6 @@ export { useDeleteArticle, usePublishArticle, useUnpublishArticle, + useUploadKnowledgeFile, KNOWLEDGE_QUERY_KEY, } from './useKnowledge'; diff --git a/packages/admin-client/src/features/knowledge/application/useKnowledge.ts b/packages/admin-client/src/features/knowledge/application/useKnowledge.ts index cdbe0bb..280beb2 100644 --- a/packages/admin-client/src/features/knowledge/application/useKnowledge.ts +++ b/packages/admin-client/src/features/knowledge/application/useKnowledge.ts @@ -59,6 +59,15 @@ export function usePublishArticle() { }); } +export function useUploadKnowledgeFile() { + return useMutation({ + mutationFn: (file: File) => knowledgeApi.uploadFile(file), + onError: () => { + message.error('文件上传失败'); + }, + }); +} + export function useUnpublishArticle() { const queryClient = useQueryClient(); diff --git a/packages/admin-client/src/features/knowledge/infrastructure/index.ts b/packages/admin-client/src/features/knowledge/infrastructure/index.ts index 73f1af9..f5938a7 100644 --- a/packages/admin-client/src/features/knowledge/infrastructure/index.ts +++ b/packages/admin-client/src/features/knowledge/infrastructure/index.ts @@ -1,2 +1,2 @@ export { knowledgeApi } from './knowledge.api'; -export type { Article, ArticleListResponse, CreateArticleParams, UpdateArticleParams } from './knowledge.api'; +export type { Article, ArticleListResponse, CreateArticleParams, UpdateArticleParams, ExtractedTextResponse } from './knowledge.api'; diff --git a/packages/admin-client/src/features/knowledge/infrastructure/knowledge.api.ts b/packages/admin-client/src/features/knowledge/infrastructure/knowledge.api.ts index 6e2dc9c..23c85a8 100644 --- a/packages/admin-client/src/features/knowledge/infrastructure/knowledge.api.ts +++ b/packages/admin-client/src/features/knowledge/infrastructure/knowledge.api.ts @@ -26,6 +26,13 @@ export interface CreateArticleParams { tags?: string[]; } +export interface ExtractedTextResponse { + extractedText: string; + suggestedTitle: string; + wordCount: number; + pageCount?: number; +} + export interface UpdateArticleParams extends Partial { id: string; } @@ -59,4 +66,14 @@ export const knowledgeApi = { unpublishArticle: async (id: string): Promise => { await api.post(`/knowledge/articles/${id}/unpublish`); }, + + uploadFile: async (file: File): Promise => { + const formData = new FormData(); + formData.append('file', file); + const response = await api.post('/knowledge/articles/upload', formData, { + headers: { 'Content-Type': 'multipart/form-data' }, + timeout: 120000, + }); + return response.data.data; + }, }; diff --git a/packages/admin-client/src/features/knowledge/presentation/pages/KnowledgePage.tsx b/packages/admin-client/src/features/knowledge/presentation/pages/KnowledgePage.tsx index 0d3c360..1706bfe 100644 --- a/packages/admin-client/src/features/knowledge/presentation/pages/KnowledgePage.tsx +++ b/packages/admin-client/src/features/knowledge/presentation/pages/KnowledgePage.tsx @@ -12,6 +12,9 @@ import { Popconfirm, Typography, Drawer, + Upload, + Segmented, + message, } from 'antd'; import { PlusOutlined, @@ -21,6 +24,8 @@ import { EyeOutlined, CheckOutlined, StopOutlined, + InboxOutlined, + UploadOutlined, } from '@ant-design/icons'; import { useKnowledgeArticles, @@ -29,6 +34,7 @@ import { useDeleteArticle, usePublishArticle, useUnpublishArticle, + useUploadKnowledgeFile, } from '../../application'; import type { Article, CreateArticleParams } from '../../infrastructure'; @@ -51,6 +57,8 @@ export function KnowledgePage() { const [isModalOpen, setIsModalOpen] = useState(false); const [isDrawerOpen, setIsDrawerOpen] = useState(false); const [selectedArticle, setSelectedArticle] = useState
(null); + const [inputMode, setInputMode] = useState<'manual' | 'upload'>('manual'); + const [isExtracting, setIsExtracting] = useState(false); const [form] = Form.useForm(); const { data, isLoading } = useKnowledgeArticles(categoryFilter); @@ -59,6 +67,7 @@ export function KnowledgePage() { const deleteMutation = useDeleteArticle(); const publishMutation = usePublishArticle(); const unpublishMutation = useUnpublishArticle(); + const uploadMutation = useUploadKnowledgeFile(); const handleEdit = (article: Article) => { setSelectedArticle(article); @@ -98,6 +107,25 @@ export function KnowledgePage() { } }; + const handleFileUpload = (file: File) => { + setIsExtracting(true); + uploadMutation.mutate(file, { + onSuccess: (result) => { + form.setFieldsValue({ + title: result.suggestedTitle, + content: result.extractedText, + }); + const info = result.pageCount + ? `已提取 ${result.wordCount} 字(${result.pageCount} 页)` + : `已提取 ${result.wordCount} 字`; + message.success(info); + setInputMode('manual'); + }, + onSettled: () => setIsExtracting(false), + }); + return false; // prevent default upload + }; + const columns = [ { title: '标题', @@ -234,6 +262,7 @@ export function KnowledgePage() { onClick={() => { setSelectedArticle(null); form.resetFields(); + setInputMode('manual'); setIsModalOpen(true); }} > @@ -287,13 +316,47 @@ export function KnowledgePage() {