diff --git a/.gitignore b/.gitignore index e6a16b0..027e6b4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode nanobot-0.1.4.post4 data _research diff --git a/frontend/package-lock.json b/frontend/package-lock.json index de38eef..5dbcf9c 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -761,6 +761,16 @@ "@noble/ciphers": "^1.0.0" } }, + "node_modules/@emnapi/runtime": { + "version": "1.9.1", + "resolved": "https://registry.npmmirror.com/@emnapi/runtime/-/runtime-1.9.1.tgz", + "integrity": "sha512-VYi5+ZVLhpgK4hQ0TAjiQiZ6ol0oe4mBx7mVv7IflsiEp0OWoVsp/+f9Vc1hOhE0TtkORVrI1GvzyreqpgWtkA==", + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.21.5", "resolved": "https://registry.npmmirror.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", diff --git a/frontend/src/components/ChatInterface.tsx b/frontend/src/components/ChatInterface.tsx index f9cdf1d..951e60a 100644 --- a/frontend/src/components/ChatInterface.tsx +++ b/frontend/src/components/ChatInterface.tsx @@ -1,6 +1,6 @@ import { useState, useRef, useEffect } from "react"; import { ScrollArea } from "@/components/ui/scroll-area"; -import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink, FileText, Download, Eye, Copy } from "lucide-react"; +import { User, Loader2, ArrowUp, ChevronDown, Check, Square, Plus, Database, Wand2, Zap, CheckCircle2, Table, XCircle, Settings, ExternalLink, FileText, Download, Eye, Copy, Mic, X } from "lucide-react"; import { api } from "@/lib/api"; import { type ChartSpec } from "@/store/visualizationStore"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; @@ -14,7 +14,10 @@ import { useTranslation } from "react-i18next"; import { InlineVisualizationCard } from "./InlineVisualizationCard"; import { useProjectStore } from "@/store/projectStore"; import { SlashCommandMenu } from "./SlashCommandMenu"; -import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"; +import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogFooter } from "@/components/ui/dialog"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; interface Message { id: string; @@ -302,6 +305,145 @@ export function ChatInterface() { const [, setIsUploading] = useState(false); const fileInputRef = useRef(null); + // Speech Recognition State + const [isRecording, setIsRecording] = useState(false); + const [isTranscribing, setIsTranscribing] = useState(false); + const [recordingLevel, setRecordingLevel] = useState(0); + const mediaRecorderRef = useRef(null); + const audioChunksRef = useRef([]); + const shouldTranscribeRef = useRef(true); + const audioContextRef = useRef(null); + const audioAnimationRef = useRef(null); + + // Local storage for whisper URL + const [whisperUrl, setWhisperUrl] = useState(() => localStorage.getItem("whisper_url") || "http://localhost:8001"); + const [isVoiceSettingsOpen, setIsVoiceSettingsOpen] = useState(false); + + const handleSaveWhisperUrl = (url: string) => { + setWhisperUrl(url); + localStorage.setItem("whisper_url", url); + setIsVoiceSettingsOpen(false); + }; + + const stopAudioMeter = () => { + if (audioAnimationRef.current) { + cancelAnimationFrame(audioAnimationRef.current); + audioAnimationRef.current = null; + } + if (audioContextRef.current) { + void audioContextRef.current.close(); + audioContextRef.current = null; + } + setRecordingLevel(0); + }; + + const startAudioMeter = (stream: MediaStream) => { + const audioContext = new AudioContext(); + const source = audioContext.createMediaStreamSource(stream); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 1024; + source.connect(analyser); + audioContextRef.current = audioContext; + const dataArray = new Uint8Array(analyser.frequencyBinCount); + + const tick = () => { + analyser.getByteTimeDomainData(dataArray); + let sum = 0; + for (let i = 0; i < dataArray.length; i += 1) { + const normalized = (dataArray[i] - 128) / 128; + sum += normalized * normalized; + } + const rms = Math.sqrt(sum / dataArray.length); + const level = Math.min(1, rms * 7); + setRecordingLevel(level); + audioAnimationRef.current = requestAnimationFrame(tick); + }; + + tick(); + }; + + const startRecording = async () => { + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mediaRecorder = new MediaRecorder(stream); + mediaRecorderRef.current = mediaRecorder; + audioChunksRef.current = []; + shouldTranscribeRef.current = true; + startAudioMeter(stream); + + mediaRecorder.ondataavailable = (e) => { + if (e.data.size > 0) { + audioChunksRef.current.push(e.data); + } + }; + + mediaRecorder.onstop = async () => { + stopAudioMeter(); + if (!shouldTranscribeRef.current) { + shouldTranscribeRef.current = true; + return; + } + setIsTranscribing(true); + try { + const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' }); + const formData = new FormData(); + formData.append("file", audioBlob, "audio.webm"); + + const baseUrl = whisperUrl || "http://localhost:8001"; + const response = await fetch(`${baseUrl.replace(/\/$/, '')}/transcribe`, { + method: "POST", + body: formData, + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const output = await response.json(); + if (output && output.text) { + setInput((prev) => prev + (prev ? " " : "") + output.text.trim()); + } + } catch (err) { + console.error("Transcription error:", err); + } finally { + setIsTranscribing(false); + } + }; + + mediaRecorder.start(); + setIsRecording(true); + } catch (err) { + console.error("Microphone access denied:", err); + } + }; + + const stopRecording = () => { + if (mediaRecorderRef.current && isRecording) { + mediaRecorderRef.current.stop(); + setIsRecording(false); + mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop()); + } + }; + + const confirmRecording = () => { + shouldTranscribeRef.current = true; + stopRecording(); + }; + + const cancelRecording = () => { + shouldTranscribeRef.current = false; + stopRecording(); + }; + + useEffect(() => { + return () => { + stopAudioMeter(); + if (mediaRecorderRef.current) { + mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop()); + } + }; + }, []); + useEffect(() => { fetchModels(); }, []); @@ -1071,41 +1213,101 @@ export function ChatInterface() { - - setSlashQuery(null)} - /> + {isRecording ? ( + <> +
+
+
+
+ {Array.from({ length: 30 }).map((_, idx) => { + const dynamic = Math.abs(Math.sin(Date.now() / 180 + idx * 0.85)); + const height = Math.max(4, Math.round((4 + dynamic * 18) * (0.45 + recordingLevel))); + return ( + + ); + })} +
+
+
+
+ + +
+ + ) : ( + <> + + setSlashQuery(null)} + /> -
- -
+
+ + + +
+ + )}
@@ -1453,41 +1655,101 @@ export function ChatInterface() {
- - setSlashQuery(null)} - /> + {isRecording ? ( + <> +
+
+
+
+ {Array.from({ length: 30 }).map((_, idx) => { + const dynamic = Math.abs(Math.sin(Date.now() / 180 + idx * 0.85)); + const height = Math.max(4, Math.round((4 + dynamic * 18) * (0.45 + recordingLevel))); + return ( + + ); + })} +
+
+
+
+ + +
+ + ) : ( + <> + + setSlashQuery(null)} + /> -
- -
+
+ + + +
+ + )}
@@ -1534,6 +1796,34 @@ export function ChatInterface() {
+ + + + {t('voiceSettings', '语音输入配置')} + +
+
+ + setWhisperUrl(e.target.value)} + className="col-span-3" + placeholder="http://localhost:8001" + /> +
+

+ 请在此配置独立的 Whisper 语音识别服务地址。例如:http://localhost:8001 +

+
+ + + + +
+
); } diff --git a/whisper/.gitignore b/whisper/.gitignore new file mode 100644 index 0000000..524e270 --- /dev/null +++ b/whisper/.gitignore @@ -0,0 +1,210 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +data +*.db + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ diff --git a/whisper/README.md b/whisper/README.md new file mode 100644 index 0000000..581d654 --- /dev/null +++ b/whisper/README.md @@ -0,0 +1,39 @@ +# Whisper Transcription Service + +This is a standalone HTTP service for transcribing audio files using the OpenAI Whisper model. + +## Prerequisites + +Make sure you have Python 3.9+ and `ffmpeg` installed on your system. + +To install `ffmpeg` on macOS: +```bash +brew install ffmpeg +``` + +## Setup & Run + +1. Create a virtual environment and install dependencies: +```bash +cd whisper +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +2. Start the server: +```bash +python main.py +``` +Or run with uvicorn directly: +```bash +uvicorn main:app --host 0.0.0.0 --port 8001 --reload +``` + +The service will run on `http://localhost:8001`. + +## API Endpoint + +- `POST /transcribe` + - Body: `multipart/form-data` with a `file` field containing the audio blob. + - Returns: `{"text": "transcribed text..."}` diff --git a/whisper/main.py b/whisper/main.py new file mode 100644 index 0000000..6e527e9 --- /dev/null +++ b/whisper/main.py @@ -0,0 +1,71 @@ +import os +import shutil +import ssl +import tempfile +import traceback +from fastapi import FastAPI, File, UploadFile +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse +import whisper +import imageio_ffmpeg + +# Ensure whisper can execute "ffmpeg" command +_ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe() +_ffmpeg_bin_dir = os.path.join(tempfile.gettempdir(), "dataclaw_ffmpeg_bin") +_ffmpeg_link = os.path.join(_ffmpeg_bin_dir, "ffmpeg") +os.makedirs(_ffmpeg_bin_dir, exist_ok=True) +if not os.path.exists(_ffmpeg_link): + try: + os.symlink(_ffmpeg_exe, _ffmpeg_link) + except OSError: + shutil.copy2(_ffmpeg_exe, _ffmpeg_link) + os.chmod(_ffmpeg_link, 0o755) +os.environ["PATH"] = _ffmpeg_bin_dir + os.pathsep + os.environ.get("PATH", "") + +# Disable SSL verification temporarily to fix UNEXPECTED_EOF_WHILE_READING error during model download +ssl._create_default_https_context = ssl._create_unverified_context + +app = FastAPI(title="Whisper Transcription Service") + +# Allow CORS for frontend +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Allows all origins + allow_credentials=True, + allow_methods=["*"], # Allows all methods + allow_headers=["*"], # Allows all headers +) + +# Load the whisper model globally. "small" is a good balance between speed and accuracy. +print("Loading Whisper model (small)... This may take a moment.") +model = whisper.load_model("small") +print("Model loaded successfully.") + +@app.post("/transcribe") +async def transcribe_audio(file: UploadFile = File(...)): + # Save the uploaded file to a temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp: + content = await file.read() + tmp.write(content) + tmp_path = tmp.name + + try: + # Convert webm to wav since Whisper's internal ffmpeg dependency can be problematic + # We will use an alternative approach or just pass the webm if it works natively + + # Transcribe using whisper + # Forcing language to Chinese for better accuracy on Chinese speech + result = model.transcribe(tmp_path, language="zh", task="transcribe") + return {"text": result.get("text", "")} + except Exception as e: + print(f"Error during transcription: {e}") + print(traceback.format_exc()) + return JSONResponse(status_code=500, content={"error": str(e)}) + finally: + # Clean up the temporary file + if os.path.exists(tmp_path): + os.remove(tmp_path) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8001) diff --git a/whisper/requirements.txt b/whisper/requirements.txt new file mode 100644 index 0000000..ddedf8a --- /dev/null +++ b/whisper/requirements.txt @@ -0,0 +1,4 @@ +fastapi +uvicorn +python-multipart +openai-whisper