From 64d81efa69f02056e1878a09f5f89c59b553c622 Mon Sep 17 00:00:00 2001 From: hailin Date: Sat, 10 May 2025 23:35:32 +0800 Subject: [PATCH] . --- app/api/search.py | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/app/api/search.py b/app/api/search.py index 0d59bea..5951221 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -3,10 +3,10 @@ from pydantic import BaseModel from app.core.embedding import embedder from app.core.config import settings from llama_index.vector_stores.faiss import FaissVectorStore -from llama_index import VectorStoreIndex, ServiceContext, StorageContext +from llama_index import VectorStoreIndex, ServiceContext import os import logging -import chardet +import faiss # 引入faiss router = APIRouter() @@ -17,19 +17,6 @@ logger = logging.getLogger(__name__) class QueryRequest(BaseModel): query: str -# 自动检测文件编码并加载 -def read_file_with_detected_encoding(file_path: str): - with open(file_path, "rb") as f: - raw_data = f.read() - result = chardet.detect(raw_data) - encoding = result['encoding'] # 获取检测到的编码 - try: - with open(file_path, "r", encoding=encoding, errors='ignore') as f: - return f.read() - except UnicodeDecodeError: - logger.error(f"UnicodeDecodeError: Unable to decode the file using {encoding}.") - raise HTTPException(status_code=500, detail="Error reading the file due to encoding issues.") - @router.post("/search") def search_docs(request: QueryRequest, user_id: str = Query(..., description="用户ID")): try: @@ -44,25 +31,25 @@ def search_docs(request: QueryRequest, user_id: str = Query(..., description=" logger.error(f"Index not found for user: {user_id} at {index_path}") raise HTTPException(status_code=404, detail="用户索引不存在") - # 创建 StorageContext 并加载 Faiss 向量存储目录 - logger.info(f"Loading Faiss vector store from path: {index_path}") - storage_context = StorageContext.from_defaults(persist_dir=index_path) + # 加载 Faiss 索引 + faiss_index_file = os.path.join(index_path, "index.faiss") # 指定faiss索引文件路径 + if not os.path.exists(faiss_index_file): + logger.error(f"Faiss index not found at {faiss_index_file}") + raise HTTPException(status_code=404, detail="Faiss索引文件未找到") - # 自动读取 docstore.json 文件内容并解码 - docstore_file = os.path.join(index_path, "docstore.json") - if os.path.exists(docstore_file): - file_content = read_file_with_detected_encoding(docstore_file) - logger.info(f"Successfully read docstore.json with detected encoding.") - else: - logger.error(f"docstore.json not found at {docstore_file}") - raise HTTPException(status_code=404, detail="docstore.json not found") + faiss_index = faiss.read_index(faiss_index_file) # 使用faiss加载索引文件 + logger.info("Faiss index loaded successfully.") - # 加载 Faiss 向量存储 - faiss_store = FaissVectorStore.from_persist_path(storage_context) + # 创建 FaissVectorStore 实例 + vector_store = FaissVectorStore(faiss_index=faiss_index) + logger.info("FaissVectorStore created successfully.") + + # 创建 ServiceContext 实例 service_context = ServiceContext.from_defaults(embed_model=embedder, llm=None) logger.info("Service context created successfully.") - index = VectorStoreIndex.from_vector_store(faiss_store, service_context=service_context) + # 创建 VectorStoreIndex + index = VectorStoreIndex.from_vector_store(vector_store, service_context=service_context) logger.info("VectorStoreIndex created successfully.") # 检索结果(真实文本)