.

2025-08-05 15:01:43 +08:00 · 2025-08-05 15:01:43 +08:00 · 357deccf86
parent f5da202b4a
commit 357deccf86
1 changed files with 13 additions and 7 deletions
--- a/app/main.py
+++ b/app/main.py
@ -179,14 +179,16 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
 async def warm_up_mp_pool():
    try:
        if DEVICE.startswith("cuda"):
-            logger.info("Warm-up (GPU) → 预生成多进程池")
+            logger.info("Warm-up (GPU) → 建多进程池")
            _ = model.encode(["warmup"], return_dense=True)
        else:
            logger.info("Warm-up (CPU) → 单进程初始化")
-            if hasattr(model, "devices"):
-                model.devices = ["cpu"]   # 彻底屏蔽 GPU
-            model.device = "cpu"
-            _ = model.encode(["warmup"], return_dense=True)  # ← 删掉 num_processes
+            # ── 临时让库“以为”没有 GPU ────────────────────────────
+            orig_cnt = torch.cuda.device_count
+            torch.cuda.device_count = lambda: 0
+            _ = model.encode(["warmup"], return_dense=True)  # 不传 num_processes
+            torch.cuda.device_count = orig_cnt
+            # ──────────────────────────────────────────────────────
    except Exception as e:
        logger.warning("Warm-up failed: %s —— 首条请求时再退避", e)

@ -207,9 +209,13 @@ def _encode(texts: List[str]):
    def _worker(t, q):
        try:
            if DEVICE.startswith("cuda"):
-                out = model.encode(t, return_dense=True)
+                out = model.encode(t, return_dense=True)          # GPU 正常跑
            else:
-                out = model.encode(t, return_dense=True)  # ← 同样不传 num_processes
+                # 临时屏蔽 GPU，单进程 CPU 推理
+                orig_cnt = torch.cuda.device_count
+                torch.cuda.device_count = lambda: 0
+                out = model.encode(t, return_dense=True)          # 不传 num_processes
+                torch.cuda.device_count = orig_cnt
            q.put(("ok", out))
        except Exception as e:
            q.put(("err", str(e)))