diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..88d8c5c Binary files /dev/null and b/.DS_Store differ diff --git a/backend/app/agent/nl2sql.py b/backend/app/agent/nl2sql.py index 47b9446..e90750e 100644 --- a/backend/app/agent/nl2sql.py +++ b/backend/app/agent/nl2sql.py @@ -23,6 +23,7 @@ from app.schemas.chart import ChartGenerationResponse from app.agent.chart import generate_chart from app.database import SessionLocal from app.models.datasource import DataSource +from app.core.files import resolve_upload_file_path SCHEMA_CACHE_TTL_SECONDS = 300 CONNECTION_CACHE_TTL_SECONDS = 30 @@ -100,15 +101,10 @@ The final answer must be a ANSI SQL query in JSON format: """ def _resolve_upload_file_path(file_url: Optional[str]) -> Path: - if not file_url or not file_url.startswith("local://"): - raise ValueError("Invalid uploaded file URL") - raw_name = file_url.replace("local://", "", 1) - safe_name = os.path.basename(raw_name) - upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads" - file_path = upload_dir / safe_name - if not file_path.exists(): - raise ValueError(f"Uploaded file not found: {safe_name}") - return file_path + try: + return resolve_upload_file_path(file_url) + except ValueError as e: + raise ValueError(f"Invalid uploaded file URL: {e}") def _load_upload_dataframe_from_path(file_path: Path) -> pd.DataFrame: suffix = file_path.suffix.lower() diff --git a/backend/app/api/upload.py b/backend/app/api/upload.py index 0b53cb5..1f4a1ed 100644 --- a/backend/app/api/upload.py +++ b/backend/app/api/upload.py @@ -11,9 +11,10 @@ upload_dir.mkdir(parents=True, exist_ok=True) @router.post("/upload/file") async def upload_file(file: UploadFile = File(...)): - allowed_extensions = ('.csv', '.xls', '.xlsx') - if not file.filename.lower().endswith(allowed_extensions): - raise HTTPException(status_code=400, detail="Invalid file type. Only CSV and Excel files allowed.") + allowed_extensions = ('.csv', '.xls', '.xlsx', '.parquet', '.db', '.sqlite', '.sqlite3') + filename_lower = file.filename.lower() + if not filename_lower.endswith(allowed_extensions): + raise HTTPException(status_code=400, detail="Invalid file type. Allowed: CSV, Excel, Parquet, SQLite.") try: content = await file.read() @@ -29,11 +30,24 @@ async def upload_file(file: UploadFile = File(...)): file_obj.seek(0) try: - if file.filename.lower().endswith('.csv'): + if filename_lower.endswith('.csv'): df = pd.read_csv(file_obj) - else: + elif filename_lower.endswith(('.xls', '.xlsx')): df = pd.read_excel(file_obj) - + elif filename_lower.endswith('.parquet'): + df = pd.read_parquet(file_obj) + elif filename_lower.endswith(('.db', '.sqlite', '.sqlite3')): + # For SQLite, we don't load into DF immediately for analysis here + # Just return success + return { + "filename": unique_filename, + "url": file_url, + "rows": 0, + "columns": [], + "summary": "SQLite database uploaded" + } + + # For DF supported types duckdb_conn = duckdb.connect(database=':memory:') duckdb_conn.register('uploaded_file', df) summary = duckdb_conn.execute("DESCRIBE uploaded_file").fetchall() diff --git a/backend/app/connectors/factory.py b/backend/app/connectors/factory.py index e96c08d..1023a6c 100644 --- a/backend/app/connectors/factory.py +++ b/backend/app/connectors/factory.py @@ -5,6 +5,7 @@ from app.connectors.postgres import PostgresConnector from app.connectors.clickhouse import ClickHouseConnector from app.connectors.parquet import ParquetConnector from app.models.datasource import DataSource +from app.core.files import resolve_upload_file_path @functools.lru_cache(maxsize=32) def _get_cached_connector(ds_type: str, config_json: str): @@ -20,7 +21,8 @@ def _get_cached_connector(ds_type: str, config_json: str): # SQLite uses connection string usually file path db_url = config.get("connection_string") if not db_url and config.get("file_path"): - db_url = f"sqlite:///{config.get('file_path')}" + file_path = str(resolve_upload_file_path(config.get("file_path"))) + db_url = f"sqlite:///{file_path}" return PostgresConnector(db_url=db_url) elif ds_type == "clickhouse": @@ -33,7 +35,8 @@ def _get_cached_connector(ds_type: str, config_json: str): ) elif ds_type == "parquet": - return ParquetConnector(file_path=config.get("file_path")) + file_path = str(resolve_upload_file_path(config.get("file_path"))) + return ParquetConnector(file_path=file_path) else: raise ValueError(f"Unsupported data source type: {ds_type}") diff --git a/backend/app/core/files.py b/backend/app/core/files.py new file mode 100644 index 0000000..9cffdaa --- /dev/null +++ b/backend/app/core/files.py @@ -0,0 +1,18 @@ +import os +from pathlib import Path +from typing import Optional + +def resolve_upload_file_path(file_url: Optional[str]) -> Path: + if not file_url: + raise ValueError("File URL is empty") + + if file_url.startswith("local://"): + raw_name = file_url.replace("local://", "", 1) + safe_name = os.path.basename(raw_name) + # Assuming we are in backend/app/core, go up to backend/data/uploads + upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads" + file_path = upload_dir / safe_name + return file_path + + # If it's already an absolute path (or relative path not starting with local://) + return Path(file_url) diff --git a/backend/data_sources.db b/backend/data_sources.db new file mode 100644 index 0000000..e69de29 diff --git a/backend/dataclaw.db b/backend/dataclaw.db index eb104a2..2af5977 100644 Binary files a/backend/dataclaw.db and b/backend/dataclaw.db differ diff --git a/examples/Car_Database.db b/examples/Car_Database.db new file mode 100644 index 0000000..e7334bc Binary files /dev/null and b/examples/Car_Database.db differ diff --git a/examples/file_example_XLS_1000.xls b/examples/file_example_XLS_1000.xls new file mode 100644 index 0000000..af70880 Binary files /dev/null and b/examples/file_example_XLS_1000.xls differ diff --git a/frontend/src/components/DataSourceForm.tsx b/frontend/src/components/DataSourceForm.tsx index 77af161..ea121a9 100644 --- a/frontend/src/components/DataSourceForm.tsx +++ b/frontend/src/components/DataSourceForm.tsx @@ -1,7 +1,8 @@ -import { useState } from "react"; +import { useState, useRef } from "react"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; -import { Loader2, Check, AlertTriangle } from "lucide-react"; +import { Loader2, Check, AlertTriangle, Upload } from "lucide-react"; +import { api } from "@/lib/api"; export interface DataSourceConfig { id?: number; @@ -24,11 +25,43 @@ export function DataSourceForm({ initialData, onSubmit, onTest, onCancel }: Data const [isTesting, setIsTesting] = useState(false); const [testResult, setTestResult] = useState<{ success: boolean; message: string } | null>(null); const [isSaving, setIsSaving] = useState(false); + const [isUploading, setIsUploading] = useState(false); + const fileInputRef = useRef(null); const handleConfigChange = (key: string, value: any) => { setConfig(prev => ({ ...prev, [key]: value })); }; + const handleFileSelect = () => { + fileInputRef.current?.click(); + }; + + const handleFileUpload = async (e: React.ChangeEvent) => { + const file = e.target.files?.[0]; + if (!file) return; + + setIsUploading(true); + const formData = new FormData(); + formData.append("file", file); + + try { + // @ts-ignore + const res = await api.post("/api/v1/upload/file", formData); + if (res && (res as any).url) { + handleConfigChange("file_path", (res as any).url); + } + } catch (error) { + console.error("Upload failed", error); + alert("上传失败"); + } finally { + setIsUploading(false); + // Clear input value so same file can be selected again + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + } + }; + const handleTest = async () => { setIsTesting(true); setTestResult(null); @@ -175,11 +208,24 @@ export function DataSourceForm({ initialData, onSubmit, onTest, onCancel }: Data
- handleConfigChange("file_path", e.target.value)} - placeholder="/path/to/database.db" - /> +
+ handleConfigChange("file_path", e.target.value)} + placeholder="/path/to/database.db" + /> + + +
); @@ -188,11 +234,24 @@ export function DataSourceForm({ initialData, onSubmit, onTest, onCancel }: Data
- handleConfigChange("file_path", e.target.value)} - placeholder="/path/to/data.parquet" - /> +
+ handleConfigChange("file_path", e.target.value)} + placeholder="/path/to/data.parquet" + /> + + +
);