faiss_rag_enterprise/scripts/build_index.py

31 lines
929 B
Python

import os
import faiss
import numpy as np
from app.core.embedding import embedder
from app.core.config import settings
def load_documents(doc_folder):
texts = []
for fname in os.listdir(doc_folder):
path = os.path.join(doc_folder, fname)
if os.path.isfile(path):
with open(path, "r", encoding="utf-8") as f:
texts.append(f.read())
return texts
def build_faiss_index(docs, dim):
vectors = embedder.encode(docs)
index = faiss.IndexFlatIP(dim)
index.add(vectors)
return index
if __name__ == "__main__":
print("[BUILD] Loading documents...")
docs = load_documents(settings.DOC_PATH)
print(f"[BUILD] Loaded {len(docs)} documents")
print("[BUILD] Building FAISS index...")
index = build_faiss_index(docs, settings.EMBEDDING_DIM)
print(f"[BUILD] Saving index to {settings.INDEX_FILE}")
faiss.write_index(index, settings.INDEX_FILE)