First build

2026-03-14 15:44:48 +08:00
parent 630d57a5cb
commit fb9c0906b5
145 changed files with 25148 additions and 0 deletions
@@ -0,0 +1,106 @@
+import sys
+import os
+import json
+from pathlib import Path
+from typing import List, Optional, Dict, Any
+from pydantic import BaseModel, Field
+
+# Add project root to sys.path to allow importing nanobot
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.append(str(PROJECT_ROOT))
+
+from nanobot.providers.litellm_provider import LiteLLMProvider
+from app.connectors.postgres import postgres_connector
+from app.connectors.clickhouse import clickhouse_connector
+from app.api.llm import _load_data as load_llm_config
+
+class NL2SQLRequest(BaseModel):
+    query: str = Field(..., description="User's natural language query")
+    source: str = Field(..., description="Data source to query (postgres, clickhouse)")
+
+class NL2SQLResponse(BaseModel):
+    sql: str
+    result: List[Dict[str, Any]]
+    error: Optional[str] = None
+
+async def process_nl2sql(request: NL2SQLRequest) -> NL2SQLResponse:
+    # 1. Get the connector and schema
+    connector = None
+    if request.source == "postgres":
+        connector = postgres_connector
+    elif request.source == "clickhouse":
+        connector = clickhouse_connector
+    else:
+        return NL2SQLResponse(sql="", result=[], error=f"Unsupported data source: {request.source}")
+
+    if not connector.test_connection():
+         return NL2SQLResponse(sql="", result=[], error=f"Failed to connect to {request.source}")
+
+    schema = connector.get_schema()
+    schema_str = json.dumps(schema, indent=2)
+
+    # 2. Get the active LLM config
+    llm_configs = load_llm_config()
+    active_config = next((c for c in llm_configs if c.get("is_active")), None)
+    
+    if not active_config:
+        return NL2SQLResponse(sql="", result=[], error="No active LLM configuration found")
+
+    # 3. Initialize Provider
+    try:
+        provider = LiteLLMProvider(
+            api_key=active_config.get("api_key"),
+            api_base=active_config.get("api_base"),
+            default_model=active_config.get("model"),
+            extra_headers=active_config.get("extra_headers")
+        )
+    except Exception as e:
+        return NL2SQLResponse(sql="", result=[], error=f"Failed to initialize LLM provider: {e}")
+
+    # 4. Construct Prompt
+    prompt = f"""You are an expert SQL generator. 
+Given the following database schema for a {request.source} database:
+{schema_str}
+
+Write a SQL query to answer the following question:
+"{request.query}"
+
+Return ONLY the SQL query. Do not include any markdown formatting, explanations, or code blocks. Just the raw SQL string.
+"""
+
+    # 5. Call LLM
+    try:
+        # provider.complete returns a string
+        response = await provider.complete(prompt)
+        sql_query = response.strip()
+        # Remove potential markdown code blocks if the LLM ignores instructions
+        if sql_query.startswith("```sql"):
+            sql_query = sql_query[6:]
+        if sql_query.startswith("```"):
+            sql_query = sql_query[3:]
+        if sql_query.endswith("```"):
+            sql_query = sql_query[:-3]
+        sql_query = sql_query.strip()
+    except Exception as e:
+        return NL2SQLResponse(sql="", result=[], error=f"LLM generation failed: {e}")
+
+    # 6. Execute SQL
+    try:
+        results = connector.execute_query(sql_query)
+        # Convert results to list of dicts if not already (Postgres returns list of dicts, ClickHouse returns list of tuples)
+        formatted_results = []
+        if request.source == "postgres":
+             formatted_results = results
+        elif request.source == "clickhouse":
+            # ClickHouse returns list of tuples, we need column names
+            # But execute_query in ClickHouseConnector just returns raw results from client.execute
+            # client.execute(query, with_column_types=True) might be better but let's stick to simple for now
+            # Actually, without column names it's hard to format as dict.
+            # Let's assume we can just return the raw tuples for now or try to fetch column names.
+            # For now, let's just return as list of lists/tuples if it's not a dict
+            formatted_results = [list(row) for row in results]
+
+        return NL2SQLResponse(sql=sql_query, result=formatted_results)
+    except Exception as e:
+        return NL2SQLResponse(sql=sql_query, result=[], error=f"SQL execution failed: {e}")
@@ -0,0 +1,96 @@
+import json
+import os
+from typing import List, Optional, Dict, Any
+from fastapi import APIRouter, HTTPException, Body
+from pydantic import BaseModel, Field
+
+router = APIRouter()
+
+DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "llm_config.json")
+
+class LLMConfig(BaseModel):
+    id: str = Field(..., description="Unique identifier for the LLM configuration")
+    provider: str = Field(..., description="Provider name (e.g., openai, azure, anthropic)")
+    model: str = Field(..., description="Model name (e.g., gpt-4, claude-3-opus)")
+    api_key: Optional[str] = Field(None, description="API Key for the provider")
+    api_base: Optional[str] = Field(None, description="Base URL for the API")
+    extra_headers: Optional[Dict[str, str]] = Field(None, description="Extra headers for the request")
+    is_active: bool = Field(True, description="Whether this configuration is active")
+
+class LLMConfigCreate(BaseModel):
+    id: str
+    provider: str
+    model: str
+    api_key: Optional[str] = None
+    api_base: Optional[str] = None
+    extra_headers: Optional[Dict[str, str]] = None
+    is_active: bool = True
+
+class LLMConfigUpdate(BaseModel):
+    provider: Optional[str] = None
+    model: Optional[str] = None
+    api_key: Optional[str] = None
+    api_base: Optional[str] = None
+    extra_headers: Optional[Dict[str, str]] = None
+    is_active: Optional[bool] = None
+
+def _load_data() -> List[Dict[str, Any]]:
+    if not os.path.exists(DATA_FILE):
+        return []
+    try:
+        with open(DATA_FILE, "r") as f:
+            return json.load(f)
+    except json.JSONDecodeError:
+        return []
+
+def _save_data(data: List[Dict[str, Any]]):
+    os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)
+    with open(DATA_FILE, "w") as f:
+        json.dump(data, f, indent=2)
+
+@router.get("/llm", response_model=List[LLMConfig])
+def list_llm_configs():
+    data = _load_data()
+    return [LLMConfig(**item) for item in data]
+
+@router.get("/llm/{config_id}", response_model=LLMConfig)
+def get_llm_config(config_id: str):
+    data = _load_data()
+    for item in data:
+        if item["id"] == config_id:
+            return LLMConfig(**item)
+    raise HTTPException(status_code=404, detail="LLM configuration not found")
+
+@router.post("/llm", response_model=LLMConfig)
+def create_llm_config(config: LLMConfigCreate):
+    data = _load_data()
+    if any(item["id"] == config.id for item in data):
+        raise HTTPException(status_code=400, detail="LLM configuration with this ID already exists")
+    
+    new_config = config.dict()
+    data.append(new_config)
+    _save_data(data)
+    return LLMConfig(**new_config)
+
+@router.put("/llm/{config_id}", response_model=LLMConfig)
+def update_llm_config(config_id: str, config: LLMConfigUpdate):
+    data = _load_data()
+    for i, item in enumerate(data):
+        if item["id"] == config_id:
+            updated_item = item.copy()
+            update_data = config.dict(exclude_unset=True)
+            updated_item.update(update_data)
+            data[i] = updated_item
+            _save_data(data)
+            return LLMConfig(**updated_item)
+    raise HTTPException(status_code=404, detail="LLM configuration not found")
+
+@router.delete("/llm/{config_id}")
+def delete_llm_config(config_id: str):
+    data = _load_data()
+    initial_len = len(data)
+    data = [item for item in data if item["id"] != config_id]
+    if len(data) == initial_len:
+        raise HTTPException(status_code=404, detail="LLM configuration not found")
+    _save_data(data)
+    return {"message": "LLM configuration deleted successfully"}
@@ -0,0 +1,93 @@
+import json
+import os
+from typing import List, Optional, Dict, Any
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+
+router = APIRouter()
+
+DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "skills.json")
+
+class Skill(BaseModel):
+    id: str = Field(..., description="Unique identifier for the skill")
+    name: str = Field(..., description="Name of the skill")
+    description: Optional[str] = Field(None, description="Description of what the skill does")
+    content: str = Field(..., description="The content/prompt/logic of the skill")
+    type: str = Field("python", description="Type of the skill (python, sql, api)")
+
+class SkillCreate(BaseModel):
+    id: str
+    name: str
+    description: Optional[str] = None
+    content: str
+    type: str = "python"
+
+class SkillUpdate(BaseModel):
+    name: Optional[str] = None
+    description: Optional[str] = None
+    content: Optional[str] = None
+    type: Optional[str] = None
+
+def _load_data() -> List[Dict[str, Any]]:
+    if not os.path.exists(DATA_FILE):
+        return []
+    try:
+        with open(DATA_FILE, "r") as f:
+            return json.load(f)
+    except json.JSONDecodeError:
+        return []
+
+def _save_data(data: List[Dict[str, Any]]):
+    os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)
+    with open(DATA_FILE, "w") as f:
+        json.dump(data, f, indent=2)
+
+def load_skills() -> List[Dict[str, Any]]:
+    return _load_data()
+
+@router.get("/skills", response_model=List[Skill])
+def list_skills():
+    data = load_skills()
+    return [Skill(**item) for item in data]
+
+@router.get("/skills/{skill_id}", response_model=Skill)
+def get_skill(skill_id: str):
+    data = _load_data()
+    for item in data:
+        if item["id"] == skill_id:
+            return Skill(**item)
+    raise HTTPException(status_code=404, detail="Skill not found")
+
+@router.post("/skills", response_model=Skill)
+def create_skill(skill: SkillCreate):
+    data = _load_data()
+    if any(item["id"] == skill.id for item in data):
+        raise HTTPException(status_code=400, detail="Skill with this ID already exists")
+    
+    new_skill = skill.dict()
+    data.append(new_skill)
+    _save_data(data)
+    return Skill(**new_skill)
+
+@router.put("/skills/{skill_id}", response_model=Skill)
+def update_skill(skill_id: str, skill: SkillUpdate):
+    data = _load_data()
+    for i, item in enumerate(data):
+        if item["id"] == skill_id:
+            updated_item = item.copy()
+            update_data = skill.dict(exclude_unset=True)
+            updated_item.update(update_data)
+            data[i] = updated_item
+            _save_data(data)
+            return Skill(**updated_item)
+    raise HTTPException(status_code=404, detail="Skill not found")
+
+@router.delete("/skills/{skill_id}")
+def delete_skill(skill_id: str):
+    data = _load_data()
+    initial_len = len(data)
+    data = [item for item in data if item["id"] != skill_id]
+    if len(data) == initial_len:
+        raise HTTPException(status_code=404, detail="Skill not found")
+    _save_data(data)
+    return {"message": "Skill deleted successfully"}
@@ -0,0 +1,53 @@
+from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks
+from app.connectors.minio import minio_connector
+import pandas as pd
+import duckdb
+import io
+import uuid
+
+router = APIRouter()
+
+@router.post("/upload/csv")
+async def upload_csv(file: UploadFile = File(...), background_tasks: BackgroundTasks = None):
+    if not file.filename.endswith('.csv'):
+        raise HTTPException(status_code=400, detail="Invalid file type. Only CSV allowed.")
+
+    try:
+        content = await file.read()
+        file_size = len(content)
+        file_obj = io.BytesIO(content)
+        
+        # Generate a unique filename
+        unique_filename = f"{uuid.uuid4()}-{file.filename}"
+        
+        # Upload to MinIO
+        minio_url = minio_connector.upload_file(unique_filename, file_obj, file_size, content_type="text/csv")
+        
+        # Reset file pointer for analysis
+        file_obj.seek(0)
+        
+        # Load into DuckDB (in-memory) for quick analysis
+        try:
+            df = pd.read_csv(file_obj)
+            duckdb_conn = duckdb.connect(database=':memory:')
+            duckdb_conn.register('uploaded_csv', df)
+            summary = duckdb_conn.execute("DESCRIBE uploaded_csv").fetchall()
+            row_count = len(df)
+            columns = list(df.columns)
+            
+            return {
+                "filename": unique_filename,
+                "url": minio_url,
+                "rows": row_count,
+                "columns": columns,
+                "summary": str(summary)
+            }
+        except Exception as e:
+             return {
+                "filename": unique_filename,
+                "url": minio_url,
+                "analysis_error": str(e)
+            }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
@@ -0,0 +1,50 @@
+from clickhouse_driver import Client
+import os
+
+class ClickHouseConnector:
+    def __init__(self, host: str = None, port: int = 9000, user: str = 'default', password: str = '', database: str = 'default'):
+        self.host = host or os.getenv("CLICKHOUSE_HOST", "localhost")
+        self.port = port or int(os.getenv("CLICKHOUSE_PORT", 9000))
+        self.user = user or os.getenv("CLICKHOUSE_USER", "default")
+        self.password = password or os.getenv("CLICKHOUSE_PASSWORD", "")
+        self.database = database or os.getenv("CLICKHOUSE_DB", "default")
+        
+        self.client = Client(
+            host=self.host, 
+            port=self.port, 
+            user=self.user, 
+            password=self.password, 
+            database=self.database
+        )
+
+    def execute_query(self, query: str):
+        try:
+            return self.client.execute(query)
+        except Exception as e:
+            print(f"ClickHouse Query Error: {e}")
+            raise e
+
+    def get_schema(self):
+        query = "SELECT table, name, type FROM system.columns WHERE database = currentDatabase()"
+        try:
+            results = self.client.execute(query)
+            schema = {}
+            for row in results:
+                table = row[0]
+                if table not in schema:
+                    schema[table] = []
+                schema[table].append(f"{row[1]} ({row[2]})")
+            return schema
+        except Exception as e:
+            print(f"Error getting schema: {e}")
+            return {}
+
+    def test_connection(self) -> bool:
+        try:
+            self.client.execute("SELECT 1")
+            return True
+        except Exception as e:
+            print(f"ClickHouse Connection Error: {e}")
+            return False
+
+clickhouse_connector = ClickHouseConnector()
@@ -0,0 +1,51 @@
+from minio import Minio
+from minio.error import S3Error
+import os
+from typing import BinaryIO
+
+class MinioConnector:
+    def __init__(self):
+        self.endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000")
+        self.access_key = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
+        self.secret_key = os.getenv("MINIO_SECRET_KEY", "minioadmin")
+        self.secure = os.getenv("MINIO_SECURE", "False").lower() == "true"
+        self.bucket_name = os.getenv("MINIO_BUCKET", "dataclaw")
+
+        self.client = Minio(
+            self.endpoint,
+            access_key=self.access_key,
+            secret_key=self.secret_key,
+            secure=self.secure
+        )
+        self._ensure_bucket_exists()
+
+    def _ensure_bucket_exists(self):
+        try:
+            if not self.client.bucket_exists(self.bucket_name):
+                self.client.make_bucket(self.bucket_name)
+        except S3Error as e:
+            print(f"MinIO Bucket Error: {e}")
+
+    def upload_file(self, object_name: str, file_data: BinaryIO, length: int, content_type: str = "application/octet-stream"):
+        try:
+            self.client.put_object(
+                self.bucket_name,
+                object_name,
+                file_data,
+                length,
+                content_type=content_type
+            )
+            return f"http{'s' if self.secure else ''}://{self.endpoint}/{self.bucket_name}/{object_name}"
+        except S3Error as e:
+            print(f"MinIO Upload Error: {e}")
+            raise e
+
+    def test_connection(self) -> bool:
+        try:
+            self.client.list_buckets()
+            return True
+        except Exception as e:
+            print(f"MinIO Connection Error: {e}")
+            return False
+
+minio_connector = MinioConnector()
@@ -0,0 +1,53 @@
+from sqlalchemy import create_engine, text
+from sqlalchemy.orm import sessionmaker
+from typing import Generator
+import os
+
+class PostgresConnector:
+    def __init__(self, db_url: str = None):
+        self.db_url = db_url or os.getenv("POSTGRES_URL", "postgresql://user:password@localhost:5432/dbname")
+        self.engine = create_engine(self.db_url)
+        self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
+
+    def get_db(self) -> Generator:
+        db = self.SessionLocal()
+        try:
+            yield db
+        finally:
+            db.close()
+
+    def execute_query(self, query: str):
+        with self.engine.connect() as connection:
+            result = connection.execute(text(query))
+            return [dict(row._mapping) for row in result]
+
+    def get_schema(self):
+        query = """
+        SELECT table_name, column_name, data_type
+        FROM information_schema.columns
+        WHERE table_schema = 'public'
+        ORDER BY table_name, ordinal_position;
+        """
+        try:
+            results = self.execute_query(query)
+            schema = {}
+            for row in results:
+                table = row['table_name']
+                if table not in schema:
+                    schema[table] = []
+                schema[table].append(f"{row['column_name']} ({row['data_type']})")
+            return schema
+        except Exception as e:
+            print(f"Error getting schema: {e}")
+            return {}
+
+    def test_connection(self) -> bool:
+        try:
+            with self.engine.connect() as connection:
+                connection.execute(text("SELECT 1"))
+            return True
+        except Exception as e:
+            print(f"PostgreSQL Connection Error: {e}")
+            return False
+
+postgres_connector = PostgresConnector()
@@ -0,0 +1,149 @@
+import asyncio
+import sys
+import os
+from pathlib import Path
+from typing import List
+
+# Add project root to sys.path to allow importing nanobot
+# Assuming backend/app/core/nanobot.py -> backend/app/core -> backend/app -> backend -> root
+# This path calculation seems correct for backend/app/core/nanobot.py relative to backend/
+# BUT nanobot package is in ../nanobot relative to backend/
+# So we need to go up one more level to reach the parent of backend/
+PROJECT_ROOT = Path(__file__).resolve().parents[3]
+if str(PROJECT_ROOT / "nanobot") not in sys.path:
+    sys.path.append(str(PROJECT_ROOT / "nanobot"))
+
+from nanobot.agent.loop import AgentLoop
+from nanobot.bus.queue import MessageBus
+from nanobot.config.loader import load_config
+from nanobot.config.paths import get_cron_dir
+from nanobot.cron.service import CronService
+from nanobot.providers.openai_codex_provider import OpenAICodexProvider
+from nanobot.providers.azure_openai_provider import AzureOpenAIProvider
+from nanobot.providers.litellm_provider import LiteLLMProvider
+from nanobot.providers.custom_provider import CustomProvider
+from nanobot.providers.registry import find_by_name
+from nanobot.session.manager import SessionManager
+from nanobot.config.schema import Config
+
+# Import skills loader
+# We use a lazy import inside the method to avoid potential circular dependencies if any arise,
+# or just import here if we are confident.
+# Given the structure, importing here should be fine as long as skills.py doesn't import nanobot.py.
+from app.api.skills import load_skills
+
+class NanobotIntegration:
+    def __init__(self):
+        self.agent: AgentLoop | None = None
+        self.bus: MessageBus | None = None
+        self.cron: CronService | None = None
+        self.config: Config | None = None
+
+    def initialize(self):
+        self.config = load_config()
+        self.bus = MessageBus()
+        provider = self._make_provider(self.config)
+        
+        cron_store_path = get_cron_dir() / "jobs.json"
+        self.cron = CronService(cron_store_path)
+        
+        session_manager = SessionManager(self.config.workspace_path)
+
+        self.agent = AgentLoop(
+            bus=self.bus,
+            provider=provider,
+            workspace=self.config.workspace_path,
+            model=self.config.agents.defaults.model,
+            temperature=self.config.agents.defaults.temperature,
+            max_tokens=self.config.agents.defaults.max_tokens,
+            max_iterations=self.config.agents.defaults.max_tool_iterations,
+            memory_window=self.config.agents.defaults.memory_window,
+            reasoning_effort=self.config.agents.defaults.reasoning_effort,
+            brave_api_key=self.config.tools.web.search.api_key or None,
+            web_proxy=self.config.tools.web.proxy or None,
+            exec_config=self.config.tools.exec,
+            cron_service=self.cron,
+            restrict_to_workspace=self.config.tools.restrict_to_workspace,
+            session_manager=session_manager,
+            mcp_servers=self.config.tools.mcp_servers,
+            channels_config=self.config.channels,
+        )
+
+    def _make_provider(self, config: Config):
+        # Logic adapted from nanobot/cli/commands.py
+        model = config.agents.defaults.model
+        provider_name = config.get_provider_name(model)
+        p = config.get_provider(model)
+
+        if provider_name == "openai_codex" or model.startswith("openai-codex/"):
+            return OpenAICodexProvider(default_model=model)
+
+        if provider_name == "custom":
+            return CustomProvider(
+                api_key=p.api_key if p else "no-key",
+                api_base=config.get_api_base(model) or "http://localhost:8000/v1",
+                default_model=model,
+            )
+
+        if provider_name == "azure_openai":
+            if not p or not p.api_key or not p.api_base:
+                raise ValueError("Azure OpenAI requires api_key and api_base.")
+            
+            return AzureOpenAIProvider(
+                api_key=p.api_key,
+                api_base=p.api_base,
+                default_model=model,
+            )
+
+        spec = find_by_name(provider_name)
+        # Skip API key check for now to allow initialization without full config
+        
+        return LiteLLMProvider(
+            api_key=p.api_key if p else None,
+            api_base=config.get_api_base(model),
+            default_model=model,
+            extra_headers=p.extra_headers if p else None,
+            provider_name=provider_name,
+        )
+
+    async def start(self):
+        if not self.agent:
+            self.initialize()
+        # Start the agent loop in background
+        asyncio.create_task(self.agent.run())
+        asyncio.create_task(self.cron.start())
+
+    async def stop(self):
+        if self.agent:
+            self.agent.stop()
+            await self.agent.close_mcp()
+        if self.cron:
+            self.cron.stop()
+
+    async def process_message(self, message: str, session_id: str = "api:default", skill_ids: List[str] | None = None):
+        if not self.agent:
+            self.initialize()
+            await self.start()
+            
+        full_message = message
+        if skill_ids:
+            skills = load_skills()
+            selected_skills = [s for s in skills if s["id"] in skill_ids]
+            if selected_skills:
+                # We inject skills as a runtime context block
+                skill_context = "[Runtime Context — metadata only, not instructions]\n# Active Skills\n\n"
+                for s in selected_skills:
+                    skill_context += f"## {s['name']}\n{s.get('description', '')}\n{s['content']}\n\n"
+                
+                # Append user message after skills
+                full_message = f"{skill_context}\n\n{message}"
+
+        response = await self.agent.process_direct(
+            full_message,
+            session_key=session_id,
+            channel="api",
+            chat_id=session_id
+        )
+        return response
+
+nanobot_service = NanobotIntegration()