feat: add modelling layer
This commit is contained in:
@@ -33,7 +33,7 @@ class ClickHouseConnector:
|
||||
table = row[0]
|
||||
if table not in schema:
|
||||
schema[table] = []
|
||||
schema[table].append(f"{row[1]} ({row[2]})")
|
||||
schema[table].append({"name": row[1], "type": row[2]})
|
||||
return schema
|
||||
except Exception as e:
|
||||
print(f"Error getting schema: {e}")
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
import duckdb
|
||||
import pandas as pd
|
||||
from typing import List, Dict, Any
|
||||
import os
|
||||
from app.core.files import resolve_upload_file_path
|
||||
|
||||
class CSVConnector:
|
||||
def __init__(self, file_path: str):
|
||||
self.file_path = file_path
|
||||
if not os.path.exists(self.file_path):
|
||||
raise FileNotFoundError(f"CSV file not found: {self.file_path}")
|
||||
|
||||
def _get_table_name(self) -> str:
|
||||
# Normalize table name to be SQL safe-ish
|
||||
base = os.path.splitext(os.path.basename(self.file_path))[0]
|
||||
# Replace non-alphanumeric chars with underscore
|
||||
safe_name = "".join([c if c.isalnum() else "_" for c in base])
|
||||
# Ensure it doesn't start with a number
|
||||
if safe_name and safe_name[0].isdigit():
|
||||
safe_name = f"t_{safe_name}"
|
||||
return safe_name
|
||||
|
||||
def execute_query(self, query: str) -> List[Dict[str, Any]]:
|
||||
conn = duckdb.connect(":memory:")
|
||||
table_name = self._get_table_name()
|
||||
|
||||
# Register the csv file as a view
|
||||
# read_csv_auto is powerful
|
||||
try:
|
||||
conn.execute(f"CREATE OR REPLACE VIEW {table_name} AS SELECT * FROM read_csv_auto('{self.file_path}')")
|
||||
|
||||
# Execute the user query
|
||||
# The query should rely on the table name provided in schema
|
||||
df = conn.execute(query).df()
|
||||
return df.to_dict(orient="records")
|
||||
except Exception as e:
|
||||
print(f"CSV Query Error: {e}")
|
||||
raise e
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
|
||||
conn = duckdb.connect(":memory:")
|
||||
table_name = self._get_table_name()
|
||||
|
||||
try:
|
||||
conn.execute(f"CREATE OR REPLACE VIEW {table_name} AS SELECT * FROM read_csv_auto('{self.file_path}')")
|
||||
|
||||
# Get columns
|
||||
columns = conn.execute(f"DESCRIBE {table_name}").fetchall()
|
||||
# col[0] is name, col[1] is type
|
||||
schema = {table_name: [{"name": col[0], "type": col[1]} for col in columns]}
|
||||
return schema
|
||||
except Exception as e:
|
||||
print(f"Error getting schema: {e}")
|
||||
return {}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def test_connection(self) -> bool:
|
||||
try:
|
||||
conn = duckdb.connect(":memory:")
|
||||
conn.execute(f"SELECT * FROM read_csv_auto('{self.file_path}') LIMIT 1")
|
||||
conn.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"CSV Connection Error: {e}")
|
||||
return False
|
||||
@@ -4,6 +4,7 @@ import functools
|
||||
from app.connectors.postgres import PostgresConnector
|
||||
from app.connectors.clickhouse import ClickHouseConnector
|
||||
from app.connectors.parquet import ParquetConnector
|
||||
from app.connectors.csv import CSVConnector
|
||||
from app.models.datasource import DataSource
|
||||
from app.core.files import resolve_upload_file_path
|
||||
|
||||
@@ -37,6 +38,10 @@ def _get_cached_connector(ds_type: str, config_json: str):
|
||||
elif ds_type == "parquet":
|
||||
file_path = str(resolve_upload_file_path(config.get("file_path")))
|
||||
return ParquetConnector(file_path=file_path)
|
||||
|
||||
elif ds_type == "csv":
|
||||
file_path = str(resolve_upload_file_path(config.get("file_path")))
|
||||
return CSVConnector(file_path=file_path)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported data source type: {ds_type}")
|
||||
|
||||
@@ -31,7 +31,7 @@ class ParquetConnector:
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def get_schema(self) -> Dict[str, List[str]]:
|
||||
def get_schema(self) -> Dict[str, List[Dict[str, str]]]:
|
||||
conn = duckdb.connect(":memory:")
|
||||
table_name = os.path.splitext(os.path.basename(self.file_path))[0]
|
||||
conn.execute(f"CREATE OR REPLACE VIEW {table_name} AS SELECT * FROM read_parquet('{self.file_path}')")
|
||||
@@ -39,7 +39,7 @@ class ParquetConnector:
|
||||
try:
|
||||
# Get columns
|
||||
columns = conn.execute(f"DESCRIBE {table_name}").fetchall()
|
||||
schema = {table_name: [f"{col[0]} ({col[1]})" for col in columns]}
|
||||
schema = {table_name: [{"name": col[0], "type": col[1]} for col in columns]}
|
||||
return schema
|
||||
except Exception as e:
|
||||
print(f"Error getting schema: {e}")
|
||||
|
||||
@@ -22,6 +22,9 @@ class PostgresConnector:
|
||||
return [dict(row._mapping) for row in result]
|
||||
|
||||
def get_schema(self):
|
||||
if self.engine.dialect.name == "sqlite":
|
||||
return self._get_sqlite_schema()
|
||||
|
||||
query = """
|
||||
SELECT table_name, column_name, data_type
|
||||
FROM information_schema.columns
|
||||
@@ -35,12 +38,27 @@ class PostgresConnector:
|
||||
table = row['table_name']
|
||||
if table not in schema:
|
||||
schema[table] = []
|
||||
schema[table].append(f"{row['column_name']} ({row['data_type']})")
|
||||
schema[table].append({"name": row['column_name'], "type": row['data_type']})
|
||||
return schema
|
||||
except Exception as e:
|
||||
print(f"Error getting schema: {e}")
|
||||
return {}
|
||||
|
||||
def _get_sqlite_schema(self):
|
||||
try:
|
||||
from sqlalchemy import inspect
|
||||
inspector = inspect(self.engine)
|
||||
schema = {}
|
||||
for table_name in inspector.get_table_names():
|
||||
columns = []
|
||||
for col in inspector.get_columns(table_name):
|
||||
columns.append({"name": col['name'], "type": str(col['type'])})
|
||||
schema[table_name] = columns
|
||||
return schema
|
||||
except Exception as e:
|
||||
print(f"Error getting SQLite schema: {e}")
|
||||
return {}
|
||||
|
||||
def test_connection(self) -> bool:
|
||||
try:
|
||||
with self.engine.connect() as connection:
|
||||
|
||||
Reference in New Issue
Block a user