From a10e9d854ddfc5ebff9d794ebc70f98076bb7ea5 Mon Sep 17 00:00:00 2001 From: hailin Date: Fri, 27 Jun 2025 19:32:25 +0800 Subject: [PATCH] . --- Dockerfile | 9 +++++++++ main.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 6 +++++- 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 Dockerfile create mode 100644 main.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d4de6d0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.10 + +RUN apt update && apt install -y ffmpeg +WORKDIR /app +COPY . . + +RUN pip install --no-cache-dir -r requirements.txt + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/main.py b/main.py new file mode 100644 index 0000000..c5ecef5 --- /dev/null +++ b/main.py @@ -0,0 +1,46 @@ +from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Header +from pydantic import BaseModel +from faster_whisper import WhisperModel +import os + +app = FastAPI() + +# 支持的模型 +SUPPORTED_MODELS = { + "FunAudioLLM/SenseVoiceSmall": "small", + "FunAudioLLM/SenseVoiceMedium": "medium", + "FunAudioLLM/SenseVoiceLarge": "large" +} + +# 模型缓存(常驻内存) +MODEL_CACHE = {} + +def get_model(model_name: str): + if model_name not in SUPPORTED_MODELS: + raise HTTPException(status_code=400, detail="Unsupported model") + if model_name not in MODEL_CACHE: + MODEL_CACHE[model_name] = WhisperModel(SUPPORTED_MODELS[model_name], compute_type="int8") + return MODEL_CACHE[model_name] + +@app.post("/audio/transcriptions") +async def transcribe_audio( + file: UploadFile = File(...), + model: str = Form(...), + authorization: str = Header(None) +): + if not authorization or not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="Unauthorized") + + # 保存音频临时文件 + audio_path = "temp_audio." + file.filename.split(".")[-1] + with open(audio_path, "wb") as f: + f.write(await file.read()) + + # 模型推理 + model_runner = get_model(model) + segments, _ = model_runner.transcribe(audio_path) + text = " ".join([seg.text for seg in segments]) + + os.remove(audio_path) + + return {"text": text} diff --git a/requirements.txt b/requirements.txt index 1b61b2c..2c06c92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,8 @@ huggingface_hub>=0.13 tokenizers>=0.13,<1 onnxruntime>=1.14,<2 av>=11 -tqdm \ No newline at end of file +tqdm +fastapi +uvicorn +python-multipart +faster-whisper \ No newline at end of file