This commit is contained in:
parent
cea0402f14
commit
f5da202b4a
15
app/main.py
15
app/main.py
|
|
@ -179,16 +179,14 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
|
|||
async def warm_up_mp_pool():
|
||||
try:
|
||||
if DEVICE.startswith("cuda"):
|
||||
logger.info("Warm-up (GPU) → 建多进程池")
|
||||
_ = model.encode(["warmup"], return_dense=True) # GPU
|
||||
logger.info("Warm-up (GPU) → 预生成多进程池")
|
||||
_ = model.encode(["warmup"], return_dense=True)
|
||||
else:
|
||||
logger.info("Warm-up (CPU) → 单进程初始化")
|
||||
# 双保险:彻底把 BGEM3 的设备表改成只含 CPU
|
||||
if hasattr(model, "devices"):
|
||||
model.devices = ["cpu"]
|
||||
model.devices = ["cpu"] # 彻底屏蔽 GPU
|
||||
model.device = "cpu"
|
||||
_ = model.encode(["warmup"], return_dense=True,
|
||||
num_processes=1) # 不再 fork
|
||||
_ = model.encode(["warmup"], return_dense=True) # ← 删掉 num_processes
|
||||
except Exception as e:
|
||||
logger.warning("Warm-up failed: %s —— 首条请求时再退避", e)
|
||||
|
||||
|
|
@ -209,12 +207,9 @@ def _encode(texts: List[str]):
|
|||
def _worker(t, q):
|
||||
try:
|
||||
if DEVICE.startswith("cuda"):
|
||||
# 正常 GPU 跑,多进程池照旧
|
||||
out = model.encode(t, return_dense=True)
|
||||
else:
|
||||
# 已经 fallback 到 CPU:禁用进程池,禁止再向 GPU 拷权重
|
||||
out = model.encode(t, return_dense=True,
|
||||
num_processes=1)
|
||||
out = model.encode(t, return_dense=True) # ← 同样不传 num_processes
|
||||
q.put(("ok", out))
|
||||
except Exception as e:
|
||||
q.put(("err", str(e)))
|
||||
|
|
|
|||
Loading…
Reference in New Issue