feat: mv data folder to root

This commit is contained in:
qixinbo
2026-03-27 15:59:23 +08:00
parent 37070d7896
commit 5d479bed68
18 changed files with 175 additions and 39 deletions
+2 -1
View File
@@ -6,12 +6,13 @@ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jose import jwt, JWTError
from pydantic import BaseModel, Field
from app.core.security import SECRET_KEY, ALGORITHM
from app.core.data_root import get_data_root
from litellm import completion
router = APIRouter()
security = HTTPBearer()
DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "llm_config.json")
DATA_FILE = str(get_data_root() / "llm_config.json")
class CurrentUser(BaseModel):
+9 -5
View File
@@ -10,14 +10,15 @@ from datetime import datetime
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from pydantic import BaseModel, Field
from app.core.data_root import get_data_root, get_workspace_root
router = APIRouter()
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
DATA_FILE = os.path.join(BASE_DIR, "data", "skills.json")
SKILL_HUB_DIR = os.path.join(BASE_DIR, "data", "workspace", "skills")
DATA_FILE = str(get_data_root() / "skills.json")
SKILL_HUB_DIR = str(get_workspace_root() / "skills")
# Ensure skill-hub directory exists
os.makedirs(SKILL_HUB_DIR, exist_ok=True)
def _ensure_skill_hub_dir() -> None:
os.makedirs(SKILL_HUB_DIR, exist_ok=True)
class Skill(BaseModel):
id: str = Field(..., description="Unique identifier for the skill")
@@ -134,6 +135,7 @@ def _write_skill_markdown(skill_dir: str, skill_name: str, description: Optional
return skill_md_path
def load_skills(project_id: Optional[int] = None) -> List[Dict[str, Any]]:
_ensure_skill_hub_dir()
data = _load_data()
registered_paths = set()
@@ -208,6 +210,7 @@ async def upload_skill(
"""Upload a skill file (SKILL.md) or a packaged skill (zip/tar.gz)."""
filename = file.filename
print(f"Uploading skill: {filename}, project_id: {project_id}")
_ensure_skill_hub_dir()
# Create a unique temp directory
temp_dir_name = f"temp_{datetime.now().timestamp()}_{os.urandom(4).hex()}"
@@ -323,6 +326,7 @@ async def upload_skill(
@router.post("/skills", response_model=Skill)
def create_skill(skill: SkillCreate):
_ensure_skill_hub_dir()
data = load_skills()
if any(item["id"] == skill.id for item in data):
raise HTTPException(status_code=400, detail="Skill with this ID already exists")
+4 -3
View File
@@ -3,14 +3,15 @@ import pandas as pd
import duckdb
import io
import uuid
from pathlib import Path
from app.core.data_root import get_uploads_root
router = APIRouter()
upload_dir = Path(__file__).resolve().parents[2] / "data" / "uploads"
upload_dir.mkdir(parents=True, exist_ok=True)
upload_dir = get_uploads_root()
@router.post("/upload/file")
async def upload_file(file: UploadFile = File(...)):
upload_dir.mkdir(parents=True, exist_ok=True)
allowed_extensions = ('.csv', '.xls', '.xlsx', '.parquet', '.db', '.sqlite', '.sqlite3')
filename_lower = file.filename.lower()
if not filename_lower.endswith(allowed_extensions):
+8 -7
View File
@@ -6,6 +6,7 @@ from urllib.parse import quote
from pydantic import BaseModel
from app.core.data_root import get_data_root, get_reports_root, get_uploads_root, get_workspace_root
LOCAL_URI_PATTERN = re.compile(r"local://[^\s<>'\"\]\)\}]+")
PATH_PATTERN = re.compile(
@@ -138,11 +139,11 @@ def _normalize_locator(raw_locator: str) -> str:
def _resolve_locator(locator: str) -> Path | None:
backend_root = Path(__file__).resolve().parents[2]
data_root = backend_root / "data"
workspace_root = data_root / "workspace"
uploads_root = data_root / "uploads"
reports_root = data_root / "data"
data_root = get_data_root()
workspace_root = get_workspace_root()
uploads_root = get_uploads_root()
reports_root = get_reports_root()
repo_root = data_root.parent
if locator.startswith("local://"):
raw_local = locator.replace("local://", "", 1).strip().lstrip("/\\")
if not raw_local:
@@ -160,11 +161,11 @@ def _resolve_locator(locator: str) -> Path | None:
if path.is_absolute():
return path
if normalized.startswith("data/data/"):
return backend_root / normalized
return repo_root / normalized
checks = [
workspace_root / normalized,
data_root / normalized,
backend_root / normalized,
repo_root / normalized,
]
for candidate in checks:
if candidate.exists():
+39
View File
@@ -0,0 +1,39 @@
import os
from pathlib import Path
BACKEND_ROOT = Path(__file__).resolve().parents[2]
REPO_ROOT = BACKEND_ROOT.parent
DEFAULT_DATA_ROOT = REPO_ROOT / "data"
LEGACY_DATA_ROOT = BACKEND_ROOT / "data"
def get_data_root() -> Path:
configured = (os.getenv("DATA_ROOT") or "").strip()
if configured:
return Path(configured).expanduser().resolve()
if DEFAULT_DATA_ROOT.exists():
return DEFAULT_DATA_ROOT
if LEGACY_DATA_ROOT.exists():
print(f"[DATA_ROOT] legacy path detected: {LEGACY_DATA_ROOT}. Please migrate to {DEFAULT_DATA_ROOT}.")
return LEGACY_DATA_ROOT
return DEFAULT_DATA_ROOT
def get_workspace_root() -> Path:
return get_data_root() / "workspace"
def get_uploads_root() -> Path:
return get_data_root() / "uploads"
def get_reports_root() -> Path:
return get_data_root() / "data"
def ensure_data_layout() -> None:
get_data_root().mkdir(parents=True, exist_ok=True)
get_workspace_root().mkdir(parents=True, exist_ok=True)
get_uploads_root().mkdir(parents=True, exist_ok=True)
get_reports_root().mkdir(parents=True, exist_ok=True)
+7 -6
View File
@@ -2,12 +2,13 @@ import os
from pathlib import Path
from typing import Optional
from app.core.data_root import get_data_root, get_reports_root, get_uploads_root, get_workspace_root
backend_root = Path(__file__).resolve().parents[2]
data_root = backend_root / "data"
workspace_root = data_root / "workspace"
uploads_root = data_root / "uploads"
reports_root = data_root / "data"
data_root = get_data_root()
workspace_root = get_workspace_root()
uploads_root = get_uploads_root()
reports_root = get_reports_root()
allowed_artifact_roots = (workspace_root, uploads_root, reports_root)
@@ -50,7 +51,7 @@ def resolve_artifact_target(target: str) -> Path | None:
if path.is_absolute():
return path
if normalized.startswith("data/data/"):
return backend_root / normalized
return data_root.parent / normalized
checks = (
workspace_root / normalized,
data_root / normalized,
+2 -2
View File
@@ -34,6 +34,7 @@ from nanobot.config.schema import Config
from app.api.skills import load_skills
from app.services.llm_cache import get_llm_configs
from app.core.data_root import get_workspace_root
from app.core.streaming_provider import StreamingLiteLLMProvider
class NanobotIntegration:
@@ -47,8 +48,7 @@ class NanobotIntegration:
self._model_agent_lock = asyncio.Lock()
def initialize(self):
# Set workspace path to backend/data/workspace
workspace_path = Path(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "workspace"))
workspace_path = get_workspace_root()
workspace_path.mkdir(parents=True, exist_ok=True)
self._sync_builtin_skills_to_workspace(workspace_path)
+11 -4
View File
@@ -5,14 +5,21 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from app.core.data_root import get_data_root
class SessionAliasStore:
def __init__(self) -> None:
backend_root = Path(__file__).resolve().parents[2]
data_dir = backend_root / "data"
data_dir.mkdir(parents=True, exist_ok=True)
data_dir = get_data_root()
try:
data_dir.mkdir(parents=True, exist_ok=True)
except PermissionError as exc:
raise RuntimeError(f"DATA_ROOT 权限不足: {data_dir}") from exc
self.db_path = data_dir / "nanobot_sessions.db"
self._init_db()
try:
self._init_db()
except PermissionError as exc:
raise RuntimeError(f"DATA_ROOT 权限不足: {data_dir}") from exc
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(str(self.db_path))
+2 -2
View File
@@ -6,9 +6,9 @@ from app.models.datasource import DataSource
from app.schemas.mdl import MDLManifest, Model, Column, TableReference
from app.connectors.factory import get_connector
from app.database import SessionLocal
from app.core.data_root import get_data_root
# Assuming running from backend/ directory
MDL_STORAGE_PATH = Path("data/mdl")
MDL_STORAGE_PATH = get_data_root() / "mdl"
class MDLService:
@staticmethod
+14 -3
View File
@@ -20,6 +20,7 @@ from app.api import upload, llm, skills, users, datasources, projects, semantic
from app.connectors.postgres import postgres_connector
from app.connectors.clickhouse import clickhouse_connector
from app.core.artifacts import extract_artifacts
from app.core.data_root import ensure_data_layout, get_data_root, get_reports_root
from app.core.files import ensure_artifact_access, resolve_artifact_target
from app.core.nanobot import nanobot_service
from app.core.session_alias_store import session_alias_store
@@ -44,9 +45,12 @@ app.add_middleware(
Base.metadata.create_all(bind=engine)
# Mount static directory for reports
data_dir = os.path.join(os.path.dirname(__file__), "data", "data")
os.makedirs(data_dir, exist_ok=True)
app.mount("/reports", StaticFiles(directory=data_dir), name="reports")
try:
ensure_data_layout()
except Exception as e:
raise RuntimeError(f"DATA_ROOT 初始化失败: {e}") from e
reports_dir = get_reports_root()
app.mount("/reports", StaticFiles(directory=str(reports_dir)), name="reports")
app.include_router(upload.router, prefix="/api/v1")
app.include_router(llm.router, prefix="/api/v1")
@@ -71,6 +75,13 @@ PREVIEWABLE_TEXT_EXTENSIONS = {
@app.on_event("startup")
async def startup_event():
try:
data_root = get_data_root()
data_root.mkdir(parents=True, exist_ok=True)
if not os.access(data_root, os.R_OK | os.W_OK | os.X_OK):
raise RuntimeError(f"DATA_ROOT 权限不足: {data_root}")
except Exception as e:
raise RuntimeError(f"DATA_ROOT 初始化失败: {e}") from e
# Initialize nanobot in background
try:
await nanobot_service.start()
+2 -1
View File
@@ -3,11 +3,12 @@ from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from app.core.data_root import get_data_root
from main import app
def _backend_data_root() -> Path:
return Path(__file__).resolve().parents[1] / "data"
return get_data_root()
def test_download_artifact_within_whitelist() -> None:
+2 -1
View File
@@ -1,10 +1,11 @@
from pathlib import Path
from app.core.artifacts import extract_artifacts
from app.core.data_root import get_data_root
def _backend_data_root() -> Path:
return Path(__file__).resolve().parents[1] / "data"
return get_data_root()
def test_extract_artifacts_from_local_and_tool_paths() -> None:
+28
View File
@@ -0,0 +1,28 @@
from pathlib import Path
from app.core import data_root
def test_data_root_prefers_env(monkeypatch, tmp_path: Path) -> None:
custom = tmp_path / "custom-data-root"
monkeypatch.setenv("DATA_ROOT", str(custom))
assert data_root.get_data_root() == custom.resolve()
def test_data_root_falls_back_to_legacy(monkeypatch, tmp_path: Path) -> None:
monkeypatch.delenv("DATA_ROOT", raising=False)
legacy = tmp_path / "legacy-data"
default = tmp_path / "default-data"
legacy.mkdir(parents=True, exist_ok=True)
monkeypatch.setattr(data_root, "LEGACY_DATA_ROOT", legacy)
monkeypatch.setattr(data_root, "DEFAULT_DATA_ROOT", default)
assert data_root.get_data_root() == legacy
def test_ensure_data_layout_creates_children(monkeypatch, tmp_path: Path) -> None:
monkeypatch.setenv("DATA_ROOT", str(tmp_path / "dr"))
data_root.ensure_data_layout()
root = data_root.get_data_root()
assert (root / "workspace").exists()
assert (root / "uploads").exists()
assert (root / "data").exists()