This commit is contained in:
parent
61972c9ebb
commit
fe580f3615
17
app/main.py
17
app/main.py
|
|
@ -174,21 +174,24 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def warm_up_mp_pool():
|
async def warm_up_mp_pool():
|
||||||
"""
|
"""
|
||||||
启动即预热:
|
GPU: 建立多进程池预热
|
||||||
- GPU 模式:建立多 GPU 进程池(官方默认逻辑)
|
CPU: 单进程预热,但先临时把 torch 看到的 GPU 设为 0 张
|
||||||
- CPU 模式:单进程跑一次,避免 fork
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if DEVICE.startswith("cuda"):
|
if DEVICE.startswith("cuda"):
|
||||||
logger.info("Warm-up (GPU) → 预生成多进程池")
|
logger.info("Warm-up (GPU) → 预生成多进程池")
|
||||||
_ = model.encode(["warmup"], return_dense=True) # 多 GPU 池
|
_ = model.encode(["warmup"], return_dense=True) # 多 GPU
|
||||||
else:
|
else:
|
||||||
logger.info("Warm-up (CPU) → 单进程初始化")
|
logger.info("Warm-up (CPU) → 单进程初始化")
|
||||||
|
# --- 关键 3 行 ---------------------------------------------------
|
||||||
|
orig_cnt = torch.cuda.device_count # 保存原函数
|
||||||
|
torch.cuda.device_count = lambda: 0 # 伪装无 GPU
|
||||||
_ = model.encode(["warmup"], return_dense=True,
|
_ = model.encode(["warmup"], return_dense=True,
|
||||||
num_processes=1) # 禁 fork
|
num_processes=1) # 单进程
|
||||||
|
torch.cuda.device_count = orig_cnt # 恢复
|
||||||
|
# ----------------------------------------------------------------
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# 预热失败不会阻止服务启动,只给警告
|
logger.warning("Warm-up failed: %s — 将在首条请求时再退避", e)
|
||||||
logger.warning("Warm-up failed: %s — will fallback at first request", e)
|
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingRequest(BaseModel):
|
class EmbeddingRequest(BaseModel):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue