From 0b75bf040ec1c0a109637bc879d39a4f3d0aa54e Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 5 Aug 2025 17:05:14 +0800 Subject: [PATCH] . --- app/main.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/app/main.py b/app/main.py index 6ba53c1..761c096 100644 --- a/app/main.py +++ b/app/main.py @@ -212,6 +212,16 @@ class EmbeddingRequest(BaseModel): model: str = "text-embedding-bge-m3" +# ③ -------- _encode() 里 worker 调用 -------- +def _worker(t, q): + try: + # out = model.encode(t, return_dense=True) # GPU or CPU 均安全 + out = model.encode(t, return_dense=True, num_processes=1) + q.put(("ok", out)) + except Exception as e: + q.put(("err", str(e))) + + def _encode(texts: List[str]): """ 单次请求: @@ -219,15 +229,6 @@ def _encode(texts: List[str]): 2. 若子进程 OOM / CUDA Error → 同一次请求 fallback 到 CPU 绝不改全局状态,其他并发请求不受影响 """ - # ③ -------- _encode() 里 worker 调用 -------- - def _worker(t, q): - try: - # out = model.encode(t, return_dense=True) # GPU or CPU 均安全 - out = model.encode(t, return_dense=True, num_processes=1) - q.put(("ok", out)) - except Exception as e: - q.put(("err", str(e))) - q = mp.Queue() p = mp.Process(target=_worker, args=(texts, q)) p.start()