31 lines
929 B
Python
31 lines
929 B
Python
import os
|
|
import faiss
|
|
import numpy as np
|
|
from app.core.embedding import embedder
|
|
from app.core.config import settings
|
|
|
|
def load_documents(doc_folder):
|
|
texts = []
|
|
for fname in os.listdir(doc_folder):
|
|
path = os.path.join(doc_folder, fname)
|
|
if os.path.isfile(path):
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
texts.append(f.read())
|
|
return texts
|
|
|
|
def build_faiss_index(docs, dim):
|
|
vectors = embedder.encode(docs)
|
|
index = faiss.IndexFlatIP(dim)
|
|
index.add(vectors)
|
|
return index
|
|
|
|
if __name__ == "__main__":
|
|
print("[BUILD] Loading documents...")
|
|
docs = load_documents(settings.DOC_PATH)
|
|
print(f"[BUILD] Loaded {len(docs)} documents")
|
|
|
|
print("[BUILD] Building FAISS index...")
|
|
index = build_faiss_index(docs, settings.EMBEDDING_DIM)
|
|
|
|
print(f"[BUILD] Saving index to {settings.INDEX_FILE}")
|
|
faiss.write_index(index, settings.INDEX_FILE) |