diff --git a/app/main.py b/app/main.py index 6ba53c1..761c096 100644 --- a/app/main.py +++ b/app/main.py @@ -212,6 +212,16 @@ class EmbeddingRequest(BaseModel): model: str = "text-embedding-bge-m3" +# ③ -------- _encode() 里 worker 调用 -------- +def _worker(t, q): + try: + # out = model.encode(t, return_dense=True) # GPU or CPU 均安全 + out = model.encode(t, return_dense=True, num_processes=1) + q.put(("ok", out)) + except Exception as e: + q.put(("err", str(e))) + + def _encode(texts: List[str]): """ 单次请求: @@ -219,15 +229,6 @@ def _encode(texts: List[str]): 2. 若子进程 OOM / CUDA Error → 同一次请求 fallback 到 CPU 绝不改全局状态,其他并发请求不受影响 """ - # ③ -------- _encode() 里 worker 调用 -------- - def _worker(t, q): - try: - # out = model.encode(t, return_dense=True) # GPU or CPU 均安全 - out = model.encode(t, return_dense=True, num_processes=1) - q.put(("ok", out)) - except Exception as e: - q.put(("err", str(e))) - q = mp.Queue() p = mp.Process(target=_worker, args=(texts, q)) p.start()