faster-whisper/main.py

from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Header
from pydantic import BaseModel
from faster_whisper import WhisperModel
import os

app = FastAPI()

# 支持的模型
SUPPORTED_MODELS = {
    "FunAudioLLM/SenseVoiceSmall": "small",
    "FunAudioLLM/SenseVoiceMedium": "medium",
    "FunAudioLLM/SenseVoiceLarge": "large"
}

# 模型缓存（常驻内存）
MODEL_CACHE = {}

def get_model(model_name: str):
    if model_name not in SUPPORTED_MODELS:
        raise HTTPException(status_code=400, detail="Unsupported model")
    if model_name not in MODEL_CACHE:
        MODEL_CACHE[model_name] = WhisperModel(SUPPORTED_MODELS[model_name], compute_type="int8")
    return MODEL_CACHE[model_name]

@app.post("/audio/transcriptions")
async def transcribe_audio(
    file: UploadFile = File(...),
    model: str = Form(...),
    authorization: str = Header(None)
):
    if not authorization or not authorization.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Unauthorized")

    # 保存音频临时文件
    audio_path = "temp_audio." + file.filename.split(".")[-1]
    with open(audio_path, "wb") as f:
        f.write(await file.read())

    # 模型推理
    model_runner = get_model(model)
    segments, _ = model_runner.transcribe(audio_path)
    text = " ".join([seg.text for seg in segments])

    os.remove(audio_path)

    return {"text": text}