From f5da202b4a5e7f50d8b47de5e3e3e163457de255 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 5 Aug 2025 14:52:39 +0800 Subject: [PATCH] . --- app/main.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/app/main.py b/app/main.py index 8535706..e1e306f 100644 --- a/app/main.py +++ b/app/main.py @@ -92,7 +92,7 @@ def load_model(device: str): if device == "cpu": # 屏蔽 GPU,让后续 torch / BGEM3 都认不出 CUDA os.environ["CUDA_VISIBLE_DEVICES"] = "" - + # Simple DataParallel for multi-GPU inference if device.startswith("cuda") and torch.cuda.device_count() > 1: logger.info( @@ -179,16 +179,14 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB) async def warm_up_mp_pool(): try: if DEVICE.startswith("cuda"): - logger.info("Warm-up (GPU) → 建多进程池") - _ = model.encode(["warmup"], return_dense=True) # GPU + logger.info("Warm-up (GPU) → 预生成多进程池") + _ = model.encode(["warmup"], return_dense=True) else: logger.info("Warm-up (CPU) → 单进程初始化") - # 双保险:彻底把 BGEM3 的设备表改成只含 CPU if hasattr(model, "devices"): - model.devices = ["cpu"] + model.devices = ["cpu"] # 彻底屏蔽 GPU model.device = "cpu" - _ = model.encode(["warmup"], return_dense=True, - num_processes=1) # 不再 fork + _ = model.encode(["warmup"], return_dense=True) # ← 删掉 num_processes except Exception as e: logger.warning("Warm-up failed: %s —— 首条请求时再退避", e) @@ -209,12 +207,9 @@ def _encode(texts: List[str]): def _worker(t, q): try: if DEVICE.startswith("cuda"): - # 正常 GPU 跑,多进程池照旧 out = model.encode(t, return_dense=True) else: - # 已经 fallback 到 CPU:禁用进程池,禁止再向 GPU 拷权重 - out = model.encode(t, return_dense=True, - num_processes=1) + out = model.encode(t, return_dense=True) # ← 同样不传 num_processes q.put(("ok", out)) except Exception as e: q.put(("err", str(e)))