This commit is contained in:
hailin 2025-08-05 14:52:39 +08:00
parent cea0402f14
commit f5da202b4a
1 changed files with 6 additions and 11 deletions

View File

@ -92,7 +92,7 @@ def load_model(device: str):
if device == "cpu":
# 屏蔽 GPU让后续 torch / BGEM3 都认不出 CUDA
os.environ["CUDA_VISIBLE_DEVICES"] = ""
# Simple DataParallel for multi-GPU inference
if device.startswith("cuda") and torch.cuda.device_count() > 1:
logger.info(
@ -179,16 +179,14 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
async def warm_up_mp_pool():
try:
if DEVICE.startswith("cuda"):
logger.info("Warm-up (GPU) → 多进程池")
_ = model.encode(["warmup"], return_dense=True) # GPU
logger.info("Warm-up (GPU) → 预生成多进程池")
_ = model.encode(["warmup"], return_dense=True)
else:
logger.info("Warm-up (CPU) → 单进程初始化")
# 双保险:彻底把 BGEM3 的设备表改成只含 CPU
if hasattr(model, "devices"):
model.devices = ["cpu"]
model.devices = ["cpu"] # 彻底屏蔽 GPU
model.device = "cpu"
_ = model.encode(["warmup"], return_dense=True,
num_processes=1) # 不再 fork
_ = model.encode(["warmup"], return_dense=True) # ← 删掉 num_processes
except Exception as e:
logger.warning("Warm-up failed: %s —— 首条请求时再退避", e)
@ -209,12 +207,9 @@ def _encode(texts: List[str]):
def _worker(t, q):
try:
if DEVICE.startswith("cuda"):
# 正常 GPU 跑,多进程池照旧
out = model.encode(t, return_dense=True)
else:
# 已经 fallback 到 CPU禁用进程池禁止再向 GPU 拷权重
out = model.encode(t, return_dense=True,
num_processes=1)
out = model.encode(t, return_dense=True) # ← 同样不传 num_processes
q.put(("ok", out))
except Exception as e:
q.put(("err", str(e)))