Files
DataClaw/backend/app/api/upload.py
T

73 lines
2.6 KiB
Python
Raw Normal View History

2026-03-15 00:10:01 +08:00
from fastapi import APIRouter, UploadFile, File, HTTPException
2026-03-14 15:44:48 +08:00
import pandas as pd
import duckdb
import io
import uuid
2026-03-15 00:10:01 +08:00
from pathlib import Path
2026-03-14 15:44:48 +08:00
router = APIRouter()
2026-03-15 00:10:01 +08:00
upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads"
upload_dir.mkdir(parents=True, exist_ok=True)
2026-03-14 15:44:48 +08:00
2026-03-15 00:10:01 +08:00
@router.post("/upload/file")
async def upload_file(file: UploadFile = File(...)):
2026-03-15 20:48:40 +08:00
allowed_extensions = ('.csv', '.xls', '.xlsx', '.parquet', '.db', '.sqlite', '.sqlite3')
filename_lower = file.filename.lower()
if not filename_lower.endswith(allowed_extensions):
raise HTTPException(status_code=400, detail="Invalid file type. Allowed: CSV, Excel, Parquet, SQLite.")
2026-03-14 15:44:48 +08:00
try:
content = await file.read()
2026-03-15 00:10:01 +08:00
if not content:
raise HTTPException(status_code=400, detail="Empty file is not allowed.")
2026-03-14 15:44:48 +08:00
file_obj = io.BytesIO(content)
unique_filename = f"{uuid.uuid4()}-{file.filename}"
2026-03-15 00:10:01 +08:00
save_path = upload_dir / unique_filename
save_path.write_bytes(content)
file_url = f"local://{unique_filename}"
2026-03-14 15:44:48 +08:00
file_obj.seek(0)
try:
2026-03-15 20:48:40 +08:00
if filename_lower.endswith('.csv'):
2026-03-15 00:10:01 +08:00
df = pd.read_csv(file_obj)
2026-03-15 20:48:40 +08:00
elif filename_lower.endswith(('.xls', '.xlsx')):
2026-03-15 00:10:01 +08:00
df = pd.read_excel(file_obj)
2026-03-15 20:48:40 +08:00
elif filename_lower.endswith('.parquet'):
df = pd.read_parquet(file_obj)
elif filename_lower.endswith(('.db', '.sqlite', '.sqlite3')):
# For SQLite, we don't load into DF immediately for analysis here
# Just return success
return {
"filename": unique_filename,
"url": file_url,
"rows": 0,
"columns": [],
"summary": "SQLite database uploaded"
}
# For DF supported types
2026-03-14 15:44:48 +08:00
duckdb_conn = duckdb.connect(database=':memory:')
2026-03-15 00:10:01 +08:00
duckdb_conn.register('uploaded_file', df)
summary = duckdb_conn.execute("DESCRIBE uploaded_file").fetchall()
2026-03-14 15:44:48 +08:00
row_count = len(df)
columns = list(df.columns)
return {
"filename": unique_filename,
2026-03-15 00:10:01 +08:00
"url": file_url,
2026-03-14 15:44:48 +08:00
"rows": row_count,
"columns": columns,
"summary": str(summary)
}
except Exception as e:
return {
"filename": unique_filename,
2026-03-15 00:10:01 +08:00
"url": file_url,
2026-03-14 15:44:48 +08:00
"analysis_error": str(e)
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))