from transformers import AutoTokenizer, AutoModel import torch import numpy as np from app.core.config import settings class BGEEmbedding: def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME) self.model = AutoModel.from_pretrained(settings.MODEL_NAME) self.model.eval() def encode(self, texts): with torch.no_grad(): inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors="pt") outputs = self.model(**inputs) embeddings = self.mean_pooling(outputs, inputs['attention_mask']) embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) return embeddings.cpu().numpy() def mean_pooling(self, model_output, attention_mask): token_embeddings = model_output[0] input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) embedder = BGEEmbedding()