.

2025-06-27 19:32:25 +08:00 · 2025-06-27 19:32:25 +08:00 · a10e9d854d
parent 739fe57d9d
commit a10e9d854d
3 changed files with 60 additions and 1 deletions
--- a/9
+++ b/9
@ -0,0 +1,9 @@
 FROM python:3.10
 RUN apt update && apt install -y ffmpeg
 WORKDIR /app
 COPY . .
 RUN pip install --no-cache-dir -r requirements.txt
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/main.py
+++ b/main.py
@ -0,0 +1,46 @@
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Header
 from pydantic import BaseModel
 from faster_whisper import WhisperModel
 import os
 app = FastAPI()
 # 支持的模型
 SUPPORTED_MODELS = {
    "FunAudioLLM/SenseVoiceSmall": "small",
    "FunAudioLLM/SenseVoiceMedium": "medium",
    "FunAudioLLM/SenseVoiceLarge": "large"
 }
 # 模型缓存（常驻内存）
 MODEL_CACHE = {}
 def get_model(model_name: str):
    if model_name not in SUPPORTED_MODELS:
        raise HTTPException(status_code=400, detail="Unsupported model")
    if model_name not in MODEL_CACHE:
        MODEL_CACHE[model_name] = WhisperModel(SUPPORTED_MODELS[model_name], compute_type="int8")
    return MODEL_CACHE[model_name]
@app.post("/audio/transcriptions")
 async def transcribe_audio(
    file: UploadFile = File(...),
    model: str = Form(...),
    authorization: str = Header(None)
 ):
    if not authorization or not authorization.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Unauthorized")
    # 保存音频临时文件
    audio_path = "temp_audio." + file.filename.split(".")[-1]
    with open(audio_path, "wb") as f:
        f.write(await file.read())
    # 模型推理
    model_runner = get_model(model)
    segments, _ = model_runner.transcribe(audio_path)
    text = " ".join([seg.text for seg in segments])
    os.remove(audio_path)
    return {"text": text}
--- a/requirements.txt
+++ b/requirements.txt
@ -3,4 +3,8 @@ huggingface_hub>=0.13
 tokenizers>=0.13,<1
 onnxruntime>=1.14,<2 
 av>=11
-tqdm
+tqdm
 fastapi
 uvicorn
 python-multipart
 faster-whisper