This commit is contained in:
parent
1bf58c86e1
commit
f05b0b2d50
|
|
@ -99,7 +99,7 @@ def _choose_precision_by_idx(idx: int) -> str:
|
||||||
return "fp16"
|
return "fp16"
|
||||||
return "fp32"
|
return "fp32"
|
||||||
except Exception:
|
except Exception:
|
||||||
return "fp16" if torch.cuda.is_available() else "fp32"
|
return "fp32"
|
||||||
|
|
||||||
def load_model(device: str):
|
def load_model(device: str):
|
||||||
"""
|
"""
|
||||||
|
|
@ -225,6 +225,11 @@ elif args.device is not None:
|
||||||
else:
|
else:
|
||||||
model, PRECISION, DEVICE = auto_select_and_load()
|
model, PRECISION, DEVICE = auto_select_and_load()
|
||||||
|
|
||||||
|
|
||||||
|
# --- global tokenizer (need this or you'll get "name 'tokenizer' is not defined") ---
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------#
|
# -----------------------------------------------------------------------------#
|
||||||
# FastAPI
|
# FastAPI
|
||||||
# -----------------------------------------------------------------------------#
|
# -----------------------------------------------------------------------------#
|
||||||
|
|
@ -236,7 +241,7 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB, BATCH_SIZE = %d", SAFE_MIN_FREE_MB,
|
||||||
def _warmup():
|
def _warmup():
|
||||||
global _READY
|
global _READY
|
||||||
try:
|
try:
|
||||||
# 尝试用 batch_size 预热;不支持就回退
|
with torch.inference_mode():
|
||||||
try:
|
try:
|
||||||
model.encode(["warmup sentence"], return_dense=True, batch_size=BATCH_SIZE)
|
model.encode(["warmup sentence"], return_dense=True, batch_size=BATCH_SIZE)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue