From cea0402f14284e5ab19c6f16ee19ace0ff6004d2 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 5 Aug 2025 14:39:11 +0800 Subject: [PATCH] . --- app/main.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/app/main.py b/app/main.py index d1f4044..8535706 100644 --- a/app/main.py +++ b/app/main.py @@ -89,6 +89,10 @@ def load_model(device: str): mdl = BGEM3FlagModel(MODEL_PATH, use_fp16=use_fp16, device=device) + if device == "cpu": + # 屏蔽 GPU,让后续 torch / BGEM3 都认不出 CUDA + os.environ["CUDA_VISIBLE_DEVICES"] = "" + # Simple DataParallel for multi-GPU inference if device.startswith("cuda") and torch.cuda.device_count() > 1: logger.info( @@ -173,25 +177,21 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB) @app.on_event("startup") async def warm_up_mp_pool(): - """ - GPU: 建立多进程池预热 - CPU: 单进程预热,但先临时把 torch 看到的 GPU 设为 0 张 - """ try: if DEVICE.startswith("cuda"): - logger.info("Warm-up (GPU) → 预生成多进程池") - _ = model.encode(["warmup"], return_dense=True) # 多 GPU + logger.info("Warm-up (GPU) → 建多进程池") + _ = model.encode(["warmup"], return_dense=True) # GPU else: logger.info("Warm-up (CPU) → 单进程初始化") - # --- 关键 3 行 --------------------------------------------------- - orig_cnt = torch.cuda.device_count # 保存原函数 - torch.cuda.device_count = lambda: 0 # 伪装无 GPU + # 双保险:彻底把 BGEM3 的设备表改成只含 CPU + if hasattr(model, "devices"): + model.devices = ["cpu"] + model.device = "cpu" _ = model.encode(["warmup"], return_dense=True, - num_processes=1) # 单进程 - torch.cuda.device_count = orig_cnt # 恢复 - # ---------------------------------------------------------------- + num_processes=1) # 不再 fork except Exception as e: - logger.warning("Warm-up failed: %s — 将在首条请求时再退避", e) + logger.warning("Warm-up failed: %s —— 首条请求时再退避", e) + class EmbeddingRequest(BaseModel):