From a10e9d854ddfc5ebff9d794ebc70f98076bb7ea5 Mon Sep 17 00:00:00 2001
From: hailin <hailin@gdzx.xyz>
Date: Fri, 27 Jun 2025 19:32:25 +0800
Subject: [PATCH] .

---
 Dockerfile       |  9 +++++++++
 main.py          | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |  6 +++++-
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 Dockerfile
 create mode 100644 main.py

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..d4de6d0
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.10
+
+RUN apt update && apt install -y ffmpeg
+WORKDIR /app
+COPY . .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..c5ecef5
--- /dev/null
+++ b/main.py
@@ -0,0 +1,46 @@
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Header
+from pydantic import BaseModel
+from faster_whisper import WhisperModel
+import os
+
+app = FastAPI()
+
+# 支持的模型
+SUPPORTED_MODELS = {
+    "FunAudioLLM/SenseVoiceSmall": "small",
+    "FunAudioLLM/SenseVoiceMedium": "medium",
+    "FunAudioLLM/SenseVoiceLarge": "large"
+}
+
+# 模型缓存（常驻内存）
+MODEL_CACHE = {}
+
+def get_model(model_name: str):
+    if model_name not in SUPPORTED_MODELS:
+        raise HTTPException(status_code=400, detail="Unsupported model")
+    if model_name not in MODEL_CACHE:
+        MODEL_CACHE[model_name] = WhisperModel(SUPPORTED_MODELS[model_name], compute_type="int8")
+    return MODEL_CACHE[model_name]
+
+@app.post("/audio/transcriptions")
+async def transcribe_audio(
+    file: UploadFile = File(...),
+    model: str = Form(...),
+    authorization: str = Header(None)
+):
+    if not authorization or not authorization.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+    # 保存音频临时文件
+    audio_path = "temp_audio." + file.filename.split(".")[-1]
+    with open(audio_path, "wb") as f:
+        f.write(await file.read())
+
+    # 模型推理
+    model_runner = get_model(model)
+    segments, _ = model_runner.transcribe(audio_path)
+    text = " ".join([seg.text for seg in segments])
+
+    os.remove(audio_path)
+
+    return {"text": text}
diff --git a/requirements.txt b/requirements.txt
index 1b61b2c..2c06c92 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,8 @@ huggingface_hub>=0.13
 tokenizers>=0.13,<1
 onnxruntime>=1.14,<2 
 av>=11
-tqdm
\ No newline at end of file
+tqdm
+fastapi
+uvicorn
+python-multipart
+faster-whisper
\ No newline at end of file