diff --git a/app/main.py b/app/main.py index 2fd092f..0ed4223 100644 --- a/app/main.py +++ b/app/main.py @@ -25,9 +25,9 @@ from FlagEmbedding import BGEM3FlagModel # Config # -----------------------------------------------------------------------------# MODEL_PATH = "model/bge-m3" # 按需改成你的权重路径 -MODEL_VRAM_MB = int(os.getenv("MODEL_VRAM_MB", "8000")) # bge-m3-large fp32 ≈ 8 GiB -POST_LOAD_GAP_MB = 192 -SAFE_MIN_FREE_MB = MODEL_VRAM_MB + POST_LOAD_GAP_MB # == 8192 MB +MODEL_VRAM_MB = int(os.getenv("MODEL_VRAM_MB", "4800")) # bge-m3-large fp16=2.4 fp32 ≈ 4.8 GiB +POST_LOAD_GAP_MB = 200 +SAFE_MIN_FREE_MB = MODEL_VRAM_MB + POST_LOAD_GAP_MB # == 5000 MB # 请求批次与单条最大长度上限(防御异常大 payload) MAX_BATCH = int(os.getenv("MAX_BATCH", "1024"))