From 357deccf86163ea45d33398394243fc2f3fc24d3 Mon Sep 17 00:00:00 2001
From: hailin <hailin@gdzx.xyz>
Date: Tue, 5 Aug 2025 15:01:43 +0800
Subject: [PATCH] .

---
 app/main.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/app/main.py b/app/main.py
index e1e306f..868a642 100644
--- a/app/main.py
+++ b/app/main.py
@@ -179,14 +179,16 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
 async def warm_up_mp_pool():
     try:
         if DEVICE.startswith("cuda"):
-            logger.info("Warm-up (GPU) → 预生成多进程池")
+            logger.info("Warm-up (GPU) → 建多进程池")
             _ = model.encode(["warmup"], return_dense=True)
         else:
             logger.info("Warm-up (CPU) → 单进程初始化")
-            if hasattr(model, "devices"):
-                model.devices = ["cpu"]   # 彻底屏蔽 GPU
-            model.device = "cpu"
-            _ = model.encode(["warmup"], return_dense=True)  # ← 删掉 num_processes
+            # ── 临时让库“以为”没有 GPU ────────────────────────────
+            orig_cnt = torch.cuda.device_count
+            torch.cuda.device_count = lambda: 0
+            _ = model.encode(["warmup"], return_dense=True)  # 不传 num_processes
+            torch.cuda.device_count = orig_cnt
+            # ──────────────────────────────────────────────────────
     except Exception as e:
         logger.warning("Warm-up failed: %s —— 首条请求时再退避", e)
 
@@ -207,9 +209,13 @@ def _encode(texts: List[str]):
     def _worker(t, q):
         try:
             if DEVICE.startswith("cuda"):
-                out = model.encode(t, return_dense=True)
+                out = model.encode(t, return_dense=True)          # GPU 正常跑
             else:
-                out = model.encode(t, return_dense=True)  # ← 同样不传 num_processes
+                # 临时屏蔽 GPU，单进程 CPU 推理
+                orig_cnt = torch.cuda.device_count
+                torch.cuda.device_count = lambda: 0
+                out = model.encode(t, return_dense=True)          # 不传 num_processes
+                torch.cuda.device_count = orig_cnt
             q.put(("ok", out))
         except Exception as e:
             q.put(("err", str(e)))