From 357deccf86163ea45d33398394243fc2f3fc24d3 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 5 Aug 2025 15:01:43 +0800 Subject: [PATCH] . --- app/main.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/app/main.py b/app/main.py index e1e306f..868a642 100644 --- a/app/main.py +++ b/app/main.py @@ -179,14 +179,16 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB) async def warm_up_mp_pool(): try: if DEVICE.startswith("cuda"): - logger.info("Warm-up (GPU) → 预生成多进程池") + logger.info("Warm-up (GPU) → 建多进程池") _ = model.encode(["warmup"], return_dense=True) else: logger.info("Warm-up (CPU) → 单进程初始化") - if hasattr(model, "devices"): - model.devices = ["cpu"] # 彻底屏蔽 GPU - model.device = "cpu" - _ = model.encode(["warmup"], return_dense=True) # ← 删掉 num_processes + # ── 临时让库“以为”没有 GPU ──────────────────────────── + orig_cnt = torch.cuda.device_count + torch.cuda.device_count = lambda: 0 + _ = model.encode(["warmup"], return_dense=True) # 不传 num_processes + torch.cuda.device_count = orig_cnt + # ────────────────────────────────────────────────────── except Exception as e: logger.warning("Warm-up failed: %s —— 首条请求时再退避", e) @@ -207,9 +209,13 @@ def _encode(texts: List[str]): def _worker(t, q): try: if DEVICE.startswith("cuda"): - out = model.encode(t, return_dense=True) + out = model.encode(t, return_dense=True) # GPU 正常跑 else: - out = model.encode(t, return_dense=True) # ← 同样不传 num_processes + # 临时屏蔽 GPU,单进程 CPU 推理 + orig_cnt = torch.cuda.device_count + torch.cuda.device_count = lambda: 0 + out = model.encode(t, return_dense=True) # 不传 num_processes + torch.cuda.device_count = orig_cnt q.put(("ok", out)) except Exception as e: q.put(("err", str(e)))