feat: mv data folder to root
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
data
|
||||
_research
|
||||
.trae
|
||||
.DS_Store
|
||||
|
||||
@@ -44,6 +44,7 @@ DataClaw 的架构主要分为三只“大钳子”:
|
||||
1. **`frontend/`** 🎨: 闪亮的外壳。基于 **React 19**、**Vite**、**TailwindCSS** 和 **Zustand** 构建。拥有类似微信/ChatGPT的对话界面、支持流式思考过程渲染以及交互式图表展示。
|
||||
2. **`backend/`** ⚙️: 强健的肌肉。一个 **FastAPI** 后端服务,负责管理项目、数据源连接、用户会话持久化以及作为 API 网关。
|
||||
3. **`nanobot/`** 🧠: 智慧的大脑。核心的 AI Agent 框架,负责处理意图路由、NL2SQL 转换、Schema 缓存管理以及与 LLM 的底层交互。
|
||||
4. **`data/`** 🗄️: 运行时数据目录。与代码目录解耦,存放上传文件、会话、技能工作区、报告与配置缓存。
|
||||
|
||||
***
|
||||
|
||||
@@ -68,6 +69,14 @@ pip install -r requirements.txt
|
||||
uvicorn app.main:app --reload --port 8000
|
||||
```
|
||||
|
||||
可选环境变量:
|
||||
|
||||
```bash
|
||||
export DATA_ROOT=/absolute/path/to/data
|
||||
```
|
||||
|
||||
若未设置,默认使用仓库根目录下的 `data/`。
|
||||
|
||||
*提示:请确保* *`nanobot`* *核心库已根据项目工作区的要求正确链接或以可编辑模式 (editable mode) 安装。*
|
||||
|
||||
### 2. 前端服务启动 ⚛️
|
||||
@@ -150,4 +159,3 @@ DataClaw 的开发深受以下优秀开源项目的启发,特此致谢:
|
||||
- [Aix-DB](https://github.com/apconw/Aix-DB): 在智能数据分析和交互式体验方面提供了极好的参考。
|
||||
|
||||
<br />
|
||||
|
||||
|
||||
+9
-1
@@ -44,6 +44,7 @@ DataClaw is divided into three main claws (components):
|
||||
1. **`frontend/`** 🎨: The shiny shell. Built with **React 19**, **Vite**, **TailwindCSS**, and **Zustand**. It features a chat-like interface, streaming AI responses, and interactive Vega charts.
|
||||
2. **`backend/`** ⚙️: The muscle. A **FastAPI** application managing projects, data source connections, user sessions, and API gateways.
|
||||
3. **`nanobot/`** 🧠: The brain. The core AI agent framework handling NL2SQL, schema caching, prompt injection, and LLM routing.
|
||||
4. **`data/`** 🗄️: Runtime data root. Decoupled from code directories and used for uploads, sessions, workspace skills, reports, and cached configs.
|
||||
|
||||
***
|
||||
|
||||
@@ -68,6 +69,14 @@ pip install -r requirements.txt
|
||||
uvicorn app.main:app --reload --port 8000
|
||||
```
|
||||
|
||||
Optional environment variable:
|
||||
|
||||
```bash
|
||||
export DATA_ROOT=/absolute/path/to/data
|
||||
```
|
||||
|
||||
If not set, DataClaw uses the repository-level `data/` directory by default.
|
||||
|
||||
*Note: Ensure your* *`nanobot`* *is properly linked or installed in editable mode as per the project workspace.*
|
||||
|
||||
### 2. Frontend Setup ⚛️
|
||||
@@ -149,4 +158,3 @@ The development of DataClaw was deeply inspired by the following excellent open-
|
||||
- [Aix-DB](https://github.com/apconw/Aix-DB): Provided an excellent reference for intelligent data analysis and interactive user experience.
|
||||
|
||||
<br />
|
||||
|
||||
|
||||
@@ -6,12 +6,13 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from jose import jwt, JWTError
|
||||
from pydantic import BaseModel, Field
|
||||
from app.core.security import SECRET_KEY, ALGORITHM
|
||||
from app.core.data_root import get_data_root
|
||||
from litellm import completion
|
||||
|
||||
router = APIRouter()
|
||||
security = HTTPBearer()
|
||||
|
||||
DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "llm_config.json")
|
||||
DATA_FILE = str(get_data_root() / "llm_config.json")
|
||||
|
||||
|
||||
class CurrentUser(BaseModel):
|
||||
|
||||
@@ -10,14 +10,15 @@ from datetime import datetime
|
||||
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.core.data_root import get_data_root, get_workspace_root
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
DATA_FILE = os.path.join(BASE_DIR, "data", "skills.json")
|
||||
SKILL_HUB_DIR = os.path.join(BASE_DIR, "data", "workspace", "skills")
|
||||
DATA_FILE = str(get_data_root() / "skills.json")
|
||||
SKILL_HUB_DIR = str(get_workspace_root() / "skills")
|
||||
|
||||
# Ensure skill-hub directory exists
|
||||
os.makedirs(SKILL_HUB_DIR, exist_ok=True)
|
||||
def _ensure_skill_hub_dir() -> None:
|
||||
os.makedirs(SKILL_HUB_DIR, exist_ok=True)
|
||||
|
||||
class Skill(BaseModel):
|
||||
id: str = Field(..., description="Unique identifier for the skill")
|
||||
@@ -134,6 +135,7 @@ def _write_skill_markdown(skill_dir: str, skill_name: str, description: Optional
|
||||
return skill_md_path
|
||||
|
||||
def load_skills(project_id: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
_ensure_skill_hub_dir()
|
||||
data = _load_data()
|
||||
|
||||
registered_paths = set()
|
||||
@@ -208,6 +210,7 @@ async def upload_skill(
|
||||
"""Upload a skill file (SKILL.md) or a packaged skill (zip/tar.gz)."""
|
||||
filename = file.filename
|
||||
print(f"Uploading skill: {filename}, project_id: {project_id}")
|
||||
_ensure_skill_hub_dir()
|
||||
|
||||
# Create a unique temp directory
|
||||
temp_dir_name = f"temp_{datetime.now().timestamp()}_{os.urandom(4).hex()}"
|
||||
@@ -323,6 +326,7 @@ async def upload_skill(
|
||||
|
||||
@router.post("/skills", response_model=Skill)
|
||||
def create_skill(skill: SkillCreate):
|
||||
_ensure_skill_hub_dir()
|
||||
data = load_skills()
|
||||
if any(item["id"] == skill.id for item in data):
|
||||
raise HTTPException(status_code=400, detail="Skill with this ID already exists")
|
||||
|
||||
@@ -3,14 +3,15 @@ import pandas as pd
|
||||
import duckdb
|
||||
import io
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.data_root import get_uploads_root
|
||||
|
||||
router = APIRouter()
|
||||
upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads"
|
||||
upload_dir.mkdir(parents=True, exist_ok=True)
|
||||
upload_dir = get_uploads_root()
|
||||
|
||||
@router.post("/upload/file")
|
||||
async def upload_file(file: UploadFile = File(...)):
|
||||
upload_dir.mkdir(parents=True, exist_ok=True)
|
||||
allowed_extensions = ('.csv', '.xls', '.xlsx', '.parquet', '.db', '.sqlite', '.sqlite3')
|
||||
filename_lower = file.filename.lower()
|
||||
if not filename_lower.endswith(allowed_extensions):
|
||||
|
||||
@@ -6,6 +6,7 @@ from urllib.parse import quote
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.data_root import get_data_root, get_reports_root, get_uploads_root, get_workspace_root
|
||||
|
||||
LOCAL_URI_PATTERN = re.compile(r"local://[^\s<>'\"\]\)\}]+")
|
||||
PATH_PATTERN = re.compile(
|
||||
@@ -138,11 +139,11 @@ def _normalize_locator(raw_locator: str) -> str:
|
||||
|
||||
|
||||
def _resolve_locator(locator: str) -> Path | None:
|
||||
backend_root = Path(__file__).resolve().parents[2]
|
||||
data_root = backend_root / "data"
|
||||
workspace_root = data_root / "workspace"
|
||||
uploads_root = data_root / "uploads"
|
||||
reports_root = data_root / "data"
|
||||
data_root = get_data_root()
|
||||
workspace_root = get_workspace_root()
|
||||
uploads_root = get_uploads_root()
|
||||
reports_root = get_reports_root()
|
||||
repo_root = data_root.parent
|
||||
if locator.startswith("local://"):
|
||||
raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\")
|
||||
if not raw_local:
|
||||
@@ -160,11 +161,11 @@ def _resolve_locator(locator: str) -> Path | None:
|
||||
if path.is_absolute():
|
||||
return path
|
||||
if normalized.startswith("data/data/"):
|
||||
return backend_root / normalized
|
||||
return repo_root / normalized
|
||||
checks = [
|
||||
workspace_root / normalized,
|
||||
data_root / normalized,
|
||||
backend_root / normalized,
|
||||
repo_root / normalized,
|
||||
]
|
||||
for candidate in checks:
|
||||
if candidate.exists():
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
BACKEND_ROOT = Path(__file__).resolve().parents[2]
|
||||
REPO_ROOT = BACKEND_ROOT.parent
|
||||
DEFAULT_DATA_ROOT = REPO_ROOT / "data"
|
||||
LEGACY_DATA_ROOT = BACKEND_ROOT / "data"
|
||||
|
||||
|
||||
def get_data_root() -> Path:
|
||||
configured = (os.getenv("DATA_ROOT") or "").strip()
|
||||
if configured:
|
||||
return Path(configured).expanduser().resolve()
|
||||
if DEFAULT_DATA_ROOT.exists():
|
||||
return DEFAULT_DATA_ROOT
|
||||
if LEGACY_DATA_ROOT.exists():
|
||||
print(f"[DATA_ROOT] legacy path detected: {LEGACY_DATA_ROOT}. Please migrate to {DEFAULT_DATA_ROOT}.")
|
||||
return LEGACY_DATA_ROOT
|
||||
return DEFAULT_DATA_ROOT
|
||||
|
||||
|
||||
def get_workspace_root() -> Path:
|
||||
return get_data_root() / "workspace"
|
||||
|
||||
|
||||
def get_uploads_root() -> Path:
|
||||
return get_data_root() / "uploads"
|
||||
|
||||
|
||||
def get_reports_root() -> Path:
|
||||
return get_data_root() / "data"
|
||||
|
||||
|
||||
def ensure_data_layout() -> None:
|
||||
get_data_root().mkdir(parents=True, exist_ok=True)
|
||||
get_workspace_root().mkdir(parents=True, exist_ok=True)
|
||||
get_uploads_root().mkdir(parents=True, exist_ok=True)
|
||||
get_reports_root().mkdir(parents=True, exist_ok=True)
|
||||
@@ -2,12 +2,13 @@ import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from app.core.data_root import get_data_root, get_reports_root, get_uploads_root, get_workspace_root
|
||||
|
||||
backend_root = Path(__file__).resolve().parents[2]
|
||||
data_root = backend_root / "data"
|
||||
workspace_root = data_root / "workspace"
|
||||
uploads_root = data_root / "uploads"
|
||||
reports_root = data_root / "data"
|
||||
|
||||
data_root = get_data_root()
|
||||
workspace_root = get_workspace_root()
|
||||
uploads_root = get_uploads_root()
|
||||
reports_root = get_reports_root()
|
||||
allowed_artifact_roots = (workspace_root, uploads_root, reports_root)
|
||||
|
||||
|
||||
@@ -50,7 +51,7 @@ def resolve_artifact_target(target: str) -> Path | None:
|
||||
if path.is_absolute():
|
||||
return path
|
||||
if normalized.startswith("data/data/"):
|
||||
return backend_root / normalized
|
||||
return data_root.parent / normalized
|
||||
checks = (
|
||||
workspace_root / normalized,
|
||||
data_root / normalized,
|
||||
|
||||
@@ -34,6 +34,7 @@ from nanobot.config.schema import Config
|
||||
from app.api.skills import load_skills
|
||||
from app.services.llm_cache import get_llm_configs
|
||||
|
||||
from app.core.data_root import get_workspace_root
|
||||
from app.core.streaming_provider import StreamingLiteLLMProvider
|
||||
|
||||
class NanobotIntegration:
|
||||
@@ -47,8 +48,7 @@ class NanobotIntegration:
|
||||
self._model_agent_lock = asyncio.Lock()
|
||||
|
||||
def initialize(self):
|
||||
# Set workspace path to backend/data/workspace
|
||||
workspace_path = Path(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "workspace"))
|
||||
workspace_path = get_workspace_root()
|
||||
workspace_path.mkdir(parents=True, exist_ok=True)
|
||||
self._sync_builtin_skills_to_workspace(workspace_path)
|
||||
|
||||
|
||||
@@ -5,14 +5,21 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from app.core.data_root import get_data_root
|
||||
|
||||
|
||||
class SessionAliasStore:
|
||||
def __init__(self) -> None:
|
||||
backend_root = Path(__file__).resolve().parents[2]
|
||||
data_dir = backend_root / "data"
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
data_dir = get_data_root()
|
||||
try:
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
except PermissionError as exc:
|
||||
raise RuntimeError(f"DATA_ROOT 权限不足: {data_dir}") from exc
|
||||
self.db_path = data_dir / "nanobot_sessions.db"
|
||||
self._init_db()
|
||||
try:
|
||||
self._init_db()
|
||||
except PermissionError as exc:
|
||||
raise RuntimeError(f"DATA_ROOT 权限不足: {data_dir}") from exc
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(str(self.db_path))
|
||||
|
||||
@@ -6,9 +6,9 @@ from app.models.datasource import DataSource
|
||||
from app.schemas.mdl import MDLManifest, Model, Column, TableReference
|
||||
from app.connectors.factory import get_connector
|
||||
from app.database import SessionLocal
|
||||
from app.core.data_root import get_data_root
|
||||
|
||||
# Assuming running from backend/ directory
|
||||
MDL_STORAGE_PATH = Path("data/mdl")
|
||||
MDL_STORAGE_PATH = get_data_root() / "mdl"
|
||||
|
||||
class MDLService:
|
||||
@staticmethod
|
||||
|
||||
+14
-3
@@ -20,6 +20,7 @@ from app.api import upload, llm, skills, users, datasources, projects, semantic
|
||||
from app.connectors.postgres import postgres_connector
|
||||
from app.connectors.clickhouse import clickhouse_connector
|
||||
from app.core.artifacts import extract_artifacts
|
||||
from app.core.data_root import ensure_data_layout, get_data_root, get_reports_root
|
||||
from app.core.files import ensure_artifact_access, resolve_artifact_target
|
||||
from app.core.nanobot import nanobot_service
|
||||
from app.core.session_alias_store import session_alias_store
|
||||
@@ -44,9 +45,12 @@ app.add_middleware(
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
# Mount static directory for reports
|
||||
data_dir = os.path.join(os.path.dirname(__file__), "data", "data")
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
app.mount("/reports", StaticFiles(directory=data_dir), name="reports")
|
||||
try:
|
||||
ensure_data_layout()
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"DATA_ROOT 初始化失败: {e}") from e
|
||||
reports_dir = get_reports_root()
|
||||
app.mount("/reports", StaticFiles(directory=str(reports_dir)), name="reports")
|
||||
|
||||
app.include_router(upload.router, prefix="/api/v1")
|
||||
app.include_router(llm.router, prefix="/api/v1")
|
||||
@@ -71,6 +75,13 @@ PREVIEWABLE_TEXT_EXTENSIONS = {
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
try:
|
||||
data_root = get_data_root()
|
||||
data_root.mkdir(parents=True, exist_ok=True)
|
||||
if not os.access(data_root, os.R_OK | os.W_OK | os.X_OK):
|
||||
raise RuntimeError(f"DATA_ROOT 权限不足: {data_root}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"DATA_ROOT 初始化失败: {e}") from e
|
||||
# Initialize nanobot in background
|
||||
try:
|
||||
await nanobot_service.start()
|
||||
|
||||
@@ -3,11 +3,12 @@ from pathlib import Path
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.core.data_root import get_data_root
|
||||
from main import app
|
||||
|
||||
|
||||
def _backend_data_root() -> Path:
|
||||
return Path(__file__).resolve().parents[1] / "data"
|
||||
return get_data_root()
|
||||
|
||||
|
||||
def test_download_artifact_within_whitelist() -> None:
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.artifacts import extract_artifacts
|
||||
from app.core.data_root import get_data_root
|
||||
|
||||
|
||||
def _backend_data_root() -> Path:
|
||||
return Path(__file__).resolve().parents[1] / "data"
|
||||
return get_data_root()
|
||||
|
||||
|
||||
def test_extract_artifacts_from_local_and_tool_paths() -> None:
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
from pathlib import Path
|
||||
|
||||
from app.core import data_root
|
||||
|
||||
|
||||
def test_data_root_prefers_env(monkeypatch, tmp_path: Path) -> None:
|
||||
custom = tmp_path / "custom-data-root"
|
||||
monkeypatch.setenv("DATA_ROOT", str(custom))
|
||||
assert data_root.get_data_root() == custom.resolve()
|
||||
|
||||
|
||||
def test_data_root_falls_back_to_legacy(monkeypatch, tmp_path: Path) -> None:
|
||||
monkeypatch.delenv("DATA_ROOT", raising=False)
|
||||
legacy = tmp_path / "legacy-data"
|
||||
default = tmp_path / "default-data"
|
||||
legacy.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setattr(data_root, "LEGACY_DATA_ROOT", legacy)
|
||||
monkeypatch.setattr(data_root, "DEFAULT_DATA_ROOT", default)
|
||||
assert data_root.get_data_root() == legacy
|
||||
|
||||
|
||||
def test_ensure_data_layout_creates_children(monkeypatch, tmp_path: Path) -> None:
|
||||
monkeypatch.setenv("DATA_ROOT", str(tmp_path / "dr"))
|
||||
data_root.ensure_data_layout()
|
||||
root = data_root.get_data_root()
|
||||
assert (root / "workspace").exists()
|
||||
assert (root / "uploads").exists()
|
||||
assert (root / "data").exists()
|
||||
@@ -1114,8 +1114,20 @@ export function ChatInterface() {
|
||||
<iframe
|
||||
title={`report-${msg.id}`}
|
||||
srcDoc={reportHtml}
|
||||
sandbox="allow-same-origin"
|
||||
sandbox="allow-same-origin allow-scripts"
|
||||
className="w-full h-[620px] bg-white"
|
||||
onLoad={(e) => {
|
||||
try {
|
||||
const doc = (e.target as HTMLIFrameElement).contentDocument;
|
||||
if (doc) {
|
||||
const style = doc.createElement('style');
|
||||
style.textContent = `html, body { overflow: auto !important; }`;
|
||||
doc.head.appendChild(style);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Failed to inject styles", err);
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
) : null}
|
||||
@@ -1371,7 +1383,19 @@ export function ChatInterface() {
|
||||
<iframe
|
||||
title={artifactPreview.name}
|
||||
src={artifactPreview.previewUrl}
|
||||
className="w-full h-full"
|
||||
className="w-full h-full border-0"
|
||||
onLoad={(e) => {
|
||||
try {
|
||||
const doc = (e.target as HTMLIFrameElement).contentDocument;
|
||||
if (doc) {
|
||||
const style = doc.createElement('style');
|
||||
style.textContent = `html, body { overflow: auto !important; }`;
|
||||
doc.head.appendChild(style);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Failed to inject styles into iframe", err);
|
||||
}
|
||||
}}
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user