This commit is contained in:
parent
f5da202b4a
commit
357deccf86
20
app/main.py
20
app/main.py
|
|
@ -179,14 +179,16 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB", SAFE_MIN_FREE_MB)
|
||||||
async def warm_up_mp_pool():
|
async def warm_up_mp_pool():
|
||||||
try:
|
try:
|
||||||
if DEVICE.startswith("cuda"):
|
if DEVICE.startswith("cuda"):
|
||||||
logger.info("Warm-up (GPU) → 预生成多进程池")
|
logger.info("Warm-up (GPU) → 建多进程池")
|
||||||
_ = model.encode(["warmup"], return_dense=True)
|
_ = model.encode(["warmup"], return_dense=True)
|
||||||
else:
|
else:
|
||||||
logger.info("Warm-up (CPU) → 单进程初始化")
|
logger.info("Warm-up (CPU) → 单进程初始化")
|
||||||
if hasattr(model, "devices"):
|
# ── 临时让库“以为”没有 GPU ────────────────────────────
|
||||||
model.devices = ["cpu"] # 彻底屏蔽 GPU
|
orig_cnt = torch.cuda.device_count
|
||||||
model.device = "cpu"
|
torch.cuda.device_count = lambda: 0
|
||||||
_ = model.encode(["warmup"], return_dense=True) # ← 删掉 num_processes
|
_ = model.encode(["warmup"], return_dense=True) # 不传 num_processes
|
||||||
|
torch.cuda.device_count = orig_cnt
|
||||||
|
# ──────────────────────────────────────────────────────
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Warm-up failed: %s —— 首条请求时再退避", e)
|
logger.warning("Warm-up failed: %s —— 首条请求时再退避", e)
|
||||||
|
|
||||||
|
|
@ -207,9 +209,13 @@ def _encode(texts: List[str]):
|
||||||
def _worker(t, q):
|
def _worker(t, q):
|
||||||
try:
|
try:
|
||||||
if DEVICE.startswith("cuda"):
|
if DEVICE.startswith("cuda"):
|
||||||
out = model.encode(t, return_dense=True)
|
out = model.encode(t, return_dense=True) # GPU 正常跑
|
||||||
else:
|
else:
|
||||||
out = model.encode(t, return_dense=True) # ← 同样不传 num_processes
|
# 临时屏蔽 GPU,单进程 CPU 推理
|
||||||
|
orig_cnt = torch.cuda.device_count
|
||||||
|
torch.cuda.device_count = lambda: 0
|
||||||
|
out = model.encode(t, return_dense=True) # 不传 num_processes
|
||||||
|
torch.cuda.device_count = orig_cnt
|
||||||
q.put(("ok", out))
|
q.put(("ok", out))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
q.put(("err", str(e)))
|
q.put(("err", str(e)))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue