From 61972c9ebb1f589ed4a9ab44668581b836d8033b Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 5 Aug 2025 13:18:20 +0800 Subject: [PATCH] . --- app/main.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/app/main.py b/app/main.py index 7a05788..7211df7 100644 --- a/app/main.py +++ b/app/main.py @@ -171,6 +171,26 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) app = FastAPI() logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB) +@app.on_event("startup") +async def warm_up_mp_pool(): + """ + 启动即预热: + - GPU 模式:建立多 GPU 进程池(官方默认逻辑) + - CPU 模式:单进程跑一次,避免 fork + """ + try: + if DEVICE.startswith("cuda"): + logger.info("Warm-up (GPU) → 预生成多进程池") + _ = model.encode(["warmup"], return_dense=True) # 多 GPU 池 + else: + logger.info("Warm-up (CPU) → 单进程初始化") + _ = model.encode(["warmup"], return_dense=True, + num_processes=1) # 禁 fork + except Exception as e: + # 预热失败不会阻止服务启动,只给警告 + logger.warning("Warm-up failed: %s — will fallback at first request", e) + + class EmbeddingRequest(BaseModel): input: Union[str, List[str]] model: str = "text-embedding-bge-m3" @@ -191,8 +211,7 @@ def _encode(texts: List[str]): else: # 已经 fallback 到 CPU:禁用进程池,禁止再向 GPU 拷权重 out = model.encode(t, return_dense=True, - num_processes=1, - device="cpu") + num_processes=1) q.put(("ok", out)) except Exception as e: q.put(("err", str(e)))