This commit is contained in:
hailin 2025-08-10 23:24:13 +08:00
parent 1bf58c86e1
commit f05b0b2d50
1 changed files with 11 additions and 6 deletions

View File

@ -99,7 +99,7 @@ def _choose_precision_by_idx(idx: int) -> str:
return "fp16" return "fp16"
return "fp32" return "fp32"
except Exception: except Exception:
return "fp16" if torch.cuda.is_available() else "fp32" return "fp32"
def load_model(device: str): def load_model(device: str):
""" """
@ -225,6 +225,11 @@ elif args.device is not None:
else: else:
model, PRECISION, DEVICE = auto_select_and_load() model, PRECISION, DEVICE = auto_select_and_load()
# --- global tokenizer (need this or you'll get "name 'tokenizer' is not defined") ---
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
# -----------------------------------------------------------------------------# # -----------------------------------------------------------------------------#
# FastAPI # FastAPI
# -----------------------------------------------------------------------------# # -----------------------------------------------------------------------------#
@ -236,7 +241,7 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB, BATCH_SIZE = %d", SAFE_MIN_FREE_MB,
def _warmup(): def _warmup():
global _READY global _READY
try: try:
# 尝试用 batch_size 预热;不支持就回退 with torch.inference_mode():
try: try:
model.encode(["warmup sentence"], return_dense=True, batch_size=BATCH_SIZE) model.encode(["warmup sentence"], return_dense=True, batch_size=BATCH_SIZE)
except TypeError: except TypeError: