This commit is contained in:
parent
0411000c03
commit
61972c9ebb
23
app/main.py
23
app/main.py
|
|
@ -171,6 +171,26 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
|||
app = FastAPI()
|
||||
logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
|
||||
|
||||
@app.on_event("startup")
|
||||
async def warm_up_mp_pool():
|
||||
"""
|
||||
启动即预热:
|
||||
- GPU 模式:建立多 GPU 进程池(官方默认逻辑)
|
||||
- CPU 模式:单进程跑一次,避免 fork
|
||||
"""
|
||||
try:
|
||||
if DEVICE.startswith("cuda"):
|
||||
logger.info("Warm-up (GPU) → 预生成多进程池")
|
||||
_ = model.encode(["warmup"], return_dense=True) # 多 GPU 池
|
||||
else:
|
||||
logger.info("Warm-up (CPU) → 单进程初始化")
|
||||
_ = model.encode(["warmup"], return_dense=True,
|
||||
num_processes=1) # 禁 fork
|
||||
except Exception as e:
|
||||
# 预热失败不会阻止服务启动,只给警告
|
||||
logger.warning("Warm-up failed: %s — will fallback at first request", e)
|
||||
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: Union[str, List[str]]
|
||||
model: str = "text-embedding-bge-m3"
|
||||
|
|
@ -191,8 +211,7 @@ def _encode(texts: List[str]):
|
|||
else:
|
||||
# 已经 fallback 到 CPU:禁用进程池,禁止再向 GPU 拷权重
|
||||
out = model.encode(t, return_dense=True,
|
||||
num_processes=1,
|
||||
device="cpu")
|
||||
num_processes=1)
|
||||
q.put(("ok", out))
|
||||
except Exception as e:
|
||||
q.put(("err", str(e)))
|
||||
|
|
|
|||
Loading…
Reference in New Issue