.

2025-08-10 23:24:13 +08:00 · 2025-08-10 23:24:13 +08:00 · f05b0b2d50
parent 1bf58c86e1
commit f05b0b2d50
1 changed files with 11 additions and 6 deletions
--- a/app/main.py
+++ b/app/main.py
@ -99,7 +99,7 @@ def _choose_precision_by_idx(idx: int) -> str:
            return "fp16"
        return "fp32"
    except Exception:
-        return "fp16" if torch.cuda.is_available() else "fp32"
+        return "fp32"
 def load_model(device: str):
    """
@ -225,6 +225,11 @@ elif args.device is not None:
 else:
    model, PRECISION, DEVICE = auto_select_and_load()
 # --- global tokenizer (need this or you'll get "name 'tokenizer' is not defined") ---
 tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
 # -----------------------------------------------------------------------------#
 # FastAPI
 # -----------------------------------------------------------------------------#
@ -236,7 +241,7 @@ logger.info("Using SAFE_MIN_FREE_MB = %d MB, BATCH_SIZE = %d", SAFE_MIN_FREE_MB,
 def _warmup():
    global _READY
    try:
-        # 尝试用 batch_size 预热；不支持就回退
+        with torch.inference_mode():
            try:
                model.encode(["warmup sentence"], return_dense=True, batch_size=BATCH_SIZE)
            except TypeError: