This commit is contained in:
hailin 2025-06-27 19:32:25 +08:00
parent 739fe57d9d
commit a10e9d854d
3 changed files with 60 additions and 1 deletions

9
Dockerfile Normal file
View File

@ -0,0 +1,9 @@
FROM python:3.10
RUN apt update && apt install -y ffmpeg
WORKDIR /app
COPY . .
RUN pip install --no-cache-dir -r requirements.txt
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

46
main.py Normal file
View File

@ -0,0 +1,46 @@
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Header
from pydantic import BaseModel
from faster_whisper import WhisperModel
import os
app = FastAPI()
# 支持的模型
SUPPORTED_MODELS = {
"FunAudioLLM/SenseVoiceSmall": "small",
"FunAudioLLM/SenseVoiceMedium": "medium",
"FunAudioLLM/SenseVoiceLarge": "large"
}
# 模型缓存(常驻内存)
MODEL_CACHE = {}
def get_model(model_name: str):
if model_name not in SUPPORTED_MODELS:
raise HTTPException(status_code=400, detail="Unsupported model")
if model_name not in MODEL_CACHE:
MODEL_CACHE[model_name] = WhisperModel(SUPPORTED_MODELS[model_name], compute_type="int8")
return MODEL_CACHE[model_name]
@app.post("/audio/transcriptions")
async def transcribe_audio(
file: UploadFile = File(...),
model: str = Form(...),
authorization: str = Header(None)
):
if not authorization or not authorization.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Unauthorized")
# 保存音频临时文件
audio_path = "temp_audio." + file.filename.split(".")[-1]
with open(audio_path, "wb") as f:
f.write(await file.read())
# 模型推理
model_runner = get_model(model)
segments, _ = model_runner.transcribe(audio_path)
text = " ".join([seg.text for seg in segments])
os.remove(audio_path)
return {"text": text}

View File

@ -4,3 +4,7 @@ tokenizers>=0.13,<1
onnxruntime>=1.14,<2
av>=11
tqdm
fastapi
uvicorn
python-multipart
faster-whisper