Files
DataClaw/dataclaw-voice/main.py
T
2026-05-13 16:43:53 +08:00

76 lines
2.6 KiB
Python

import os
import shutil
import ssl
import tempfile
import traceback
from fastapi import FastAPI, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import whisper
import imageio_ffmpeg
# Ensure whisper can execute "ffmpeg" command
_ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
_ffmpeg_bin_dir = os.path.join(tempfile.gettempdir(), "dataclaw_ffmpeg_bin")
_ffmpeg_link = os.path.join(_ffmpeg_bin_dir, "ffmpeg")
os.makedirs(_ffmpeg_bin_dir, exist_ok=True)
if not os.path.exists(_ffmpeg_link):
try:
os.symlink(_ffmpeg_exe, _ffmpeg_link)
except OSError:
shutil.copy2(_ffmpeg_exe, _ffmpeg_link)
os.chmod(_ffmpeg_link, 0o755)
os.environ["PATH"] = _ffmpeg_bin_dir + os.pathsep + os.environ.get("PATH", "")
# Disable SSL verification temporarily to fix UNEXPECTED_EOF_WHILE_READING error during model download
ssl._create_default_https_context = ssl._create_unverified_context
app = FastAPI(title="Whisper Transcription Service")
# Allow CORS for frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
# Load the whisper model globally. "small" is a good balance between speed and accuracy.
print("Loading Whisper model (small)... This may take a moment.")
model = whisper.load_model("small")
print("Model loaded successfully.")
@app.get("/health")
async def health():
return {"status": "ok"}
@app.post("/transcribe")
async def transcribe_audio(file: UploadFile = File(...)):
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
try:
# Convert webm to wav since Whisper's internal ffmpeg dependency can be problematic
# We will use an alternative approach or just pass the webm if it works natively
# Transcribe using whisper
# Forcing language to Chinese for better accuracy on Chinese speech
result = model.transcribe(tmp_path, language="zh", task="transcribe")
return {"text": result.get("text", "")}
except Exception as e:
print(f"Error during transcription: {e}")
print(traceback.format_exc())
return JSONResponse(status_code=500, content={"error": str(e)})
finally:
# Clean up the temporary file
if os.path.exists(tmp_path):
os.remove(tmp_path)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8001)