Update 2026-05-13 16:43:53

2026-05-13 16:43:53 +08:00
parent 6af5c584f4
commit afd7c5fe85
490 changed files with 850 additions and 922 deletions
@@ -0,0 +1,75 @@
+import os
+import shutil
+import ssl
+import tempfile
+import traceback
+from fastapi import FastAPI, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+import whisper
+import imageio_ffmpeg
+
+# Ensure whisper can execute "ffmpeg" command
+_ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
+_ffmpeg_bin_dir = os.path.join(tempfile.gettempdir(), "dataclaw_ffmpeg_bin")
+_ffmpeg_link = os.path.join(_ffmpeg_bin_dir, "ffmpeg")
+os.makedirs(_ffmpeg_bin_dir, exist_ok=True)
+if not os.path.exists(_ffmpeg_link):
+    try:
+        os.symlink(_ffmpeg_exe, _ffmpeg_link)
+    except OSError:
+        shutil.copy2(_ffmpeg_exe, _ffmpeg_link)
+        os.chmod(_ffmpeg_link, 0o755)
+os.environ["PATH"] = _ffmpeg_bin_dir + os.pathsep + os.environ.get("PATH", "")
+
+# Disable SSL verification temporarily to fix UNEXPECTED_EOF_WHILE_READING error during model download
+ssl._create_default_https_context = ssl._create_unverified_context
+
+app = FastAPI(title="Whisper Transcription Service")
+
+# Allow CORS for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+
+# Load the whisper model globally. "small" is a good balance between speed and accuracy.
+print("Loading Whisper model (small)... This may take a moment.")
+model = whisper.load_model("small")
+print("Model loaded successfully.")
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
+
+@app.post("/transcribe")
+async def transcribe_audio(file: UploadFile = File(...)):
+    # Save the uploaded file to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
+        content = await file.read()
+        tmp.write(content)
+        tmp_path = tmp.name
+
+    try:
+        # Convert webm to wav since Whisper's internal ffmpeg dependency can be problematic
+        # We will use an alternative approach or just pass the webm if it works natively
+        
+        # Transcribe using whisper
+        # Forcing language to Chinese for better accuracy on Chinese speech
+        result = model.transcribe(tmp_path, language="zh", task="transcribe")
+        return {"text": result.get("text", "")}
+    except Exception as e:
+        print(f"Error during transcription: {e}")
+        print(traceback.format_exc())
+        return JSONResponse(status_code=500, content={"error": str(e)})
+    finally:
+        # Clean up the temporary file
+        if os.path.exists(tmp_path):
+            os.remove(tmp_path)
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8001)