diff --git a/scripts/rag_build_query.py b/scripts/rag_build_query.py index d62db24..ba94f5d 100644 --- a/scripts/rag_build_query.py +++ b/scripts/rag_build_query.py @@ -1,9 +1,31 @@ import os import logging +from typing import List +import faiss +from llama_index import ( + SimpleDirectoryReader, + VectorStoreIndex, + ServiceContext, + StorageContext, + PromptTemplate +) +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.vector_stores.faiss import FaissVectorStore +from app.core.config import settings # 确保配置导入 +from scripts.permissions import get_user_allowed_indexes # 确保权限导入 -# 确保路径变量已定义 -USER_DOC_PATH = "docs" USER_INDEX_PATH = "index_data" +USER_DOC_PATH = "docs" + +# ✅ 自动加前缀的 BGE-m3 embedding 封装类 +class BGEEmbedding(HuggingFaceEmbedding): + def _get_query_embedding(self, query: str) -> List[float]: + prefix = "Represent this sentence for searching relevant passages: " + return super()._get_query_embedding(prefix + query) + + def _get_query_embeddings(self, queries: List[str]) -> List[List[float]]: + prefix = "Represent this sentence for searching relevant passages: " + return super()._get_query_embeddings([prefix + q for q in queries]) def build_user_index(user_id: str): # 设置日志