This commit is contained in:
hailin 2025-08-05 17:05:14 +08:00
parent 61962c074b
commit 0b75bf040e
1 changed files with 10 additions and 9 deletions

View File

@ -212,6 +212,16 @@ class EmbeddingRequest(BaseModel):
model: str = "text-embedding-bge-m3" model: str = "text-embedding-bge-m3"
# ③ -------- _encode() 里 worker 调用 --------
def _worker(t, q):
try:
# out = model.encode(t, return_dense=True) # GPU or CPU 均安全
out = model.encode(t, return_dense=True, num_processes=1)
q.put(("ok", out))
except Exception as e:
q.put(("err", str(e)))
def _encode(texts: List[str]): def _encode(texts: List[str]):
""" """
单次请求 单次请求
@ -219,15 +229,6 @@ def _encode(texts: List[str]):
2. 若子进程 OOM / CUDA Error 同一次请求 fallback CPU 2. 若子进程 OOM / CUDA Error 同一次请求 fallback CPU
绝不改全局状态其他并发请求不受影响 绝不改全局状态其他并发请求不受影响
""" """
# ③ -------- _encode() 里 worker 调用 --------
def _worker(t, q):
try:
# out = model.encode(t, return_dense=True) # GPU or CPU 均安全
out = model.encode(t, return_dense=True, num_processes=1)
q.put(("ok", out))
except Exception as e:
q.put(("err", str(e)))
q = mp.Queue() q = mp.Queue()
p = mp.Process(target=_worker, args=(texts, q)) p = mp.Process(target=_worker, args=(texts, q))
p.start() p.start()