This commit is contained in:
parent
739fe57d9d
commit
a10e9d854d
|
|
@ -0,0 +1,9 @@
|
||||||
|
FROM python:3.10
|
||||||
|
|
||||||
|
RUN apt update && apt install -y ffmpeg
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Header
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
import os
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
# 支持的模型
|
||||||
|
SUPPORTED_MODELS = {
|
||||||
|
"FunAudioLLM/SenseVoiceSmall": "small",
|
||||||
|
"FunAudioLLM/SenseVoiceMedium": "medium",
|
||||||
|
"FunAudioLLM/SenseVoiceLarge": "large"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 模型缓存(常驻内存)
|
||||||
|
MODEL_CACHE = {}
|
||||||
|
|
||||||
|
def get_model(model_name: str):
|
||||||
|
if model_name not in SUPPORTED_MODELS:
|
||||||
|
raise HTTPException(status_code=400, detail="Unsupported model")
|
||||||
|
if model_name not in MODEL_CACHE:
|
||||||
|
MODEL_CACHE[model_name] = WhisperModel(SUPPORTED_MODELS[model_name], compute_type="int8")
|
||||||
|
return MODEL_CACHE[model_name]
|
||||||
|
|
||||||
|
@app.post("/audio/transcriptions")
|
||||||
|
async def transcribe_audio(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
model: str = Form(...),
|
||||||
|
authorization: str = Header(None)
|
||||||
|
):
|
||||||
|
if not authorization or not authorization.startswith("Bearer "):
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
# 保存音频临时文件
|
||||||
|
audio_path = "temp_audio." + file.filename.split(".")[-1]
|
||||||
|
with open(audio_path, "wb") as f:
|
||||||
|
f.write(await file.read())
|
||||||
|
|
||||||
|
# 模型推理
|
||||||
|
model_runner = get_model(model)
|
||||||
|
segments, _ = model_runner.transcribe(audio_path)
|
||||||
|
text = " ".join([seg.text for seg in segments])
|
||||||
|
|
||||||
|
os.remove(audio_path)
|
||||||
|
|
||||||
|
return {"text": text}
|
||||||
|
|
@ -3,4 +3,8 @@ huggingface_hub>=0.13
|
||||||
tokenizers>=0.13,<1
|
tokenizers>=0.13,<1
|
||||||
onnxruntime>=1.14,<2
|
onnxruntime>=1.14,<2
|
||||||
av>=11
|
av>=11
|
||||||
tqdm
|
tqdm
|
||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
python-multipart
|
||||||
|
faster-whisper
|
||||||
Loading…
Reference in New Issue