add data source
This commit is contained in:
@@ -23,6 +23,7 @@ from app.schemas.chart import ChartGenerationResponse
|
||||
from app.agent.chart import generate_chart
|
||||
from app.database import SessionLocal
|
||||
from app.models.datasource import DataSource
|
||||
from app.core.files import resolve_upload_file_path
|
||||
|
||||
SCHEMA_CACHE_TTL_SECONDS = 300
|
||||
CONNECTION_CACHE_TTL_SECONDS = 30
|
||||
@@ -100,15 +101,10 @@ The final answer must be a ANSI SQL query in JSON format:
|
||||
"""
|
||||
|
||||
def _resolve_upload_file_path(file_url: Optional[str]) -> Path:
|
||||
if not file_url or not file_url.startswith("local://"):
|
||||
raise ValueError("Invalid uploaded file URL")
|
||||
raw_name = file_url.replace("local://", "", 1)
|
||||
safe_name = os.path.basename(raw_name)
|
||||
upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads"
|
||||
file_path = upload_dir / safe_name
|
||||
if not file_path.exists():
|
||||
raise ValueError(f"Uploaded file not found: {safe_name}")
|
||||
return file_path
|
||||
try:
|
||||
return resolve_upload_file_path(file_url)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Invalid uploaded file URL: {e}")
|
||||
|
||||
def _load_upload_dataframe_from_path(file_path: Path) -> pd.DataFrame:
|
||||
suffix = file_path.suffix.lower()
|
||||
|
||||
@@ -11,9 +11,10 @@ upload_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@router.post("/upload/file")
|
||||
async def upload_file(file: UploadFile = File(...)):
|
||||
allowed_extensions = ('.csv', '.xls', '.xlsx')
|
||||
if not file.filename.lower().endswith(allowed_extensions):
|
||||
raise HTTPException(status_code=400, detail="Invalid file type. Only CSV and Excel files allowed.")
|
||||
allowed_extensions = ('.csv', '.xls', '.xlsx', '.parquet', '.db', '.sqlite', '.sqlite3')
|
||||
filename_lower = file.filename.lower()
|
||||
if not filename_lower.endswith(allowed_extensions):
|
||||
raise HTTPException(status_code=400, detail="Invalid file type. Allowed: CSV, Excel, Parquet, SQLite.")
|
||||
|
||||
try:
|
||||
content = await file.read()
|
||||
@@ -29,11 +30,24 @@ async def upload_file(file: UploadFile = File(...)):
|
||||
file_obj.seek(0)
|
||||
|
||||
try:
|
||||
if file.filename.lower().endswith('.csv'):
|
||||
if filename_lower.endswith('.csv'):
|
||||
df = pd.read_csv(file_obj)
|
||||
else:
|
||||
elif filename_lower.endswith(('.xls', '.xlsx')):
|
||||
df = pd.read_excel(file_obj)
|
||||
|
||||
elif filename_lower.endswith('.parquet'):
|
||||
df = pd.read_parquet(file_obj)
|
||||
elif filename_lower.endswith(('.db', '.sqlite', '.sqlite3')):
|
||||
# For SQLite, we don't load into DF immediately for analysis here
|
||||
# Just return success
|
||||
return {
|
||||
"filename": unique_filename,
|
||||
"url": file_url,
|
||||
"rows": 0,
|
||||
"columns": [],
|
||||
"summary": "SQLite database uploaded"
|
||||
}
|
||||
|
||||
# For DF supported types
|
||||
duckdb_conn = duckdb.connect(database=':memory:')
|
||||
duckdb_conn.register('uploaded_file', df)
|
||||
summary = duckdb_conn.execute("DESCRIBE uploaded_file").fetchall()
|
||||
|
||||
@@ -5,6 +5,7 @@ from app.connectors.postgres import PostgresConnector
|
||||
from app.connectors.clickhouse import ClickHouseConnector
|
||||
from app.connectors.parquet import ParquetConnector
|
||||
from app.models.datasource import DataSource
|
||||
from app.core.files import resolve_upload_file_path
|
||||
|
||||
@functools.lru_cache(maxsize=32)
|
||||
def _get_cached_connector(ds_type: str, config_json: str):
|
||||
@@ -20,7 +21,8 @@ def _get_cached_connector(ds_type: str, config_json: str):
|
||||
# SQLite uses connection string usually file path
|
||||
db_url = config.get("connection_string")
|
||||
if not db_url and config.get("file_path"):
|
||||
db_url = f"sqlite:///{config.get('file_path')}"
|
||||
file_path = str(resolve_upload_file_path(config.get("file_path")))
|
||||
db_url = f"sqlite:///{file_path}"
|
||||
return PostgresConnector(db_url=db_url)
|
||||
|
||||
elif ds_type == "clickhouse":
|
||||
@@ -33,7 +35,8 @@ def _get_cached_connector(ds_type: str, config_json: str):
|
||||
)
|
||||
|
||||
elif ds_type == "parquet":
|
||||
return ParquetConnector(file_path=config.get("file_path"))
|
||||
file_path = str(resolve_upload_file_path(config.get("file_path")))
|
||||
return ParquetConnector(file_path=file_path)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported data source type: {ds_type}")
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
def resolve_upload_file_path(file_url: Optional[str]) -> Path:
|
||||
if not file_url:
|
||||
raise ValueError("File URL is empty")
|
||||
|
||||
if file_url.startswith("local://"):
|
||||
raw_name = file_url.replace("local://", "", 1)
|
||||
safe_name = os.path.basename(raw_name)
|
||||
# Assuming we are in backend/app/core, go up to backend/data/uploads
|
||||
upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads"
|
||||
file_path = upload_dir / safe_name
|
||||
return file_path
|
||||
|
||||
# If it's already an absolute path (or relative path not starting with local://)
|
||||
return Path(file_url)
|
||||
Reference in New Issue
Block a user