This commit is contained in:
parent
61962c074b
commit
0b75bf040e
19
app/main.py
19
app/main.py
|
|
@ -212,15 +212,8 @@ class EmbeddingRequest(BaseModel):
|
||||||
model: str = "text-embedding-bge-m3"
|
model: str = "text-embedding-bge-m3"
|
||||||
|
|
||||||
|
|
||||||
def _encode(texts: List[str]):
|
# ③ -------- _encode() 里 worker 调用 --------
|
||||||
"""
|
def _worker(t, q):
|
||||||
单次请求:
|
|
||||||
1. 子进程跑 GPU 推理;成功→返回
|
|
||||||
2. 若子进程 OOM / CUDA Error → 同一次请求 fallback 到 CPU
|
|
||||||
绝不改全局状态,其他并发请求不受影响
|
|
||||||
"""
|
|
||||||
# ③ -------- _encode() 里 worker 调用 --------
|
|
||||||
def _worker(t, q):
|
|
||||||
try:
|
try:
|
||||||
# out = model.encode(t, return_dense=True) # GPU or CPU 均安全
|
# out = model.encode(t, return_dense=True) # GPU or CPU 均安全
|
||||||
out = model.encode(t, return_dense=True, num_processes=1)
|
out = model.encode(t, return_dense=True, num_processes=1)
|
||||||
|
|
@ -228,6 +221,14 @@ def _encode(texts: List[str]):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
q.put(("err", str(e)))
|
q.put(("err", str(e)))
|
||||||
|
|
||||||
|
|
||||||
|
def _encode(texts: List[str]):
|
||||||
|
"""
|
||||||
|
单次请求:
|
||||||
|
1. 子进程跑 GPU 推理;成功→返回
|
||||||
|
2. 若子进程 OOM / CUDA Error → 同一次请求 fallback 到 CPU
|
||||||
|
绝不改全局状态,其他并发请求不受影响
|
||||||
|
"""
|
||||||
q = mp.Queue()
|
q = mp.Queue()
|
||||||
p = mp.Process(target=_worker, args=(texts, q))
|
p = mp.Process(target=_worker, args=(texts, q))
|
||||||
p.start()
|
p.start()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue