25 lines
1.1 KiB
Python
25 lines
1.1 KiB
Python
from transformers import AutoTokenizer, AutoModel
|
|
import torch
|
|
import numpy as np
|
|
from app.core.config import settings
|
|
|
|
class BGEEmbedding:
|
|
def __init__(self):
|
|
self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
|
|
self.model = AutoModel.from_pretrained(settings.MODEL_NAME)
|
|
self.model.eval()
|
|
|
|
def encode(self, texts):
|
|
with torch.no_grad():
|
|
inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
|
|
outputs = self.model(**inputs)
|
|
embeddings = self.mean_pooling(outputs, inputs['attention_mask'])
|
|
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
|
|
return embeddings.cpu().numpy()
|
|
|
|
def mean_pooling(self, model_output, attention_mask):
|
|
token_embeddings = model_output[0]
|
|
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
|
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
|
|
|
embedder = BGEEmbedding() |