This commit is contained in:
hailin 2025-08-05 13:18:20 +08:00
parent 0411000c03
commit 61972c9ebb
1 changed files with 21 additions and 2 deletions

View File

@ -171,6 +171,26 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
app = FastAPI()
logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
@app.on_event("startup")
async def warm_up_mp_pool():
"""
启动即预热
- GPU 模式建立多 GPU 进程池官方默认逻辑
- CPU 模式单进程跑一次避免 fork
"""
try:
if DEVICE.startswith("cuda"):
logger.info("Warm-up (GPU) → 预生成多进程池")
_ = model.encode(["warmup"], return_dense=True) # 多 GPU 池
else:
logger.info("Warm-up (CPU) → 单进程初始化")
_ = model.encode(["warmup"], return_dense=True,
num_processes=1) # 禁 fork
except Exception as e:
# 预热失败不会阻止服务启动,只给警告
logger.warning("Warm-up failed: %s — will fallback at first request", e)
class EmbeddingRequest(BaseModel):
input: Union[str, List[str]]
model: str = "text-embedding-bge-m3"
@ -191,8 +211,7 @@ def _encode(texts: List[str]):
else:
# 已经 fallback 到 CPU禁用进程池禁止再向 GPU 拷权重
out = model.encode(t, return_dense=True,
num_processes=1,
device="cpu")
num_processes=1)
q.put(("ok", out))
except Exception as e:
q.put(("err", str(e)))