This commit is contained in:
hailin 2025-08-05 14:21:14 +08:00
parent 61972c9ebb
commit fe580f3615
1 changed files with 10 additions and 7 deletions

View File

@ -174,21 +174,24 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
@app.on_event("startup") @app.on_event("startup")
async def warm_up_mp_pool(): async def warm_up_mp_pool():
""" """
启动即预热 GPU: 建立多进程池预热
- GPU 模式建立多 GPU 进程池官方默认逻辑 CPU: 单进程预热但先临时把 torch 看到的 GPU 设为 0
- CPU 模式单进程跑一次避免 fork
""" """
try: try:
if DEVICE.startswith("cuda"): if DEVICE.startswith("cuda"):
logger.info("Warm-up (GPU) → 预生成多进程池") logger.info("Warm-up (GPU) → 预生成多进程池")
_ = model.encode(["warmup"], return_dense=True) # 多 GPU _ = model.encode(["warmup"], return_dense=True) # 多 GPU
else: else:
logger.info("Warm-up (CPU) → 单进程初始化") logger.info("Warm-up (CPU) → 单进程初始化")
# --- 关键 3 行 ---------------------------------------------------
orig_cnt = torch.cuda.device_count # 保存原函数
torch.cuda.device_count = lambda: 0 # 伪装无 GPU
_ = model.encode(["warmup"], return_dense=True, _ = model.encode(["warmup"], return_dense=True,
num_processes=1) # 禁 fork num_processes=1) # 单进程
torch.cuda.device_count = orig_cnt # 恢复
# ----------------------------------------------------------------
except Exception as e: except Exception as e:
# 预热失败不会阻止服务启动,只给警告 logger.warning("Warm-up failed: %s — 将在首条请求时再退避", e)
logger.warning("Warm-up failed: %s — will fallback at first request", e)
class EmbeddingRequest(BaseModel): class EmbeddingRequest(BaseModel):