First build

This commit is contained in:
qixinbo
2026-03-14 15:44:48 +08:00
parent 630d57a5cb
commit fb9c0906b5
145 changed files with 25148 additions and 0 deletions
View File
View File
+106
View File
@@ -0,0 +1,106 @@
import sys
import os
import json
from pathlib import Path
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
# Add project root to sys.path to allow importing nanobot
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT) not in sys.path:
sys.path.append(str(PROJECT_ROOT))
from nanobot.providers.litellm_provider import LiteLLMProvider
from app.connectors.postgres import postgres_connector
from app.connectors.clickhouse import clickhouse_connector
from app.api.llm import _load_data as load_llm_config
class NL2SQLRequest(BaseModel):
query: str = Field(..., description="User's natural language query")
source: str = Field(..., description="Data source to query (postgres, clickhouse)")
class NL2SQLResponse(BaseModel):
sql: str
result: List[Dict[str, Any]]
error: Optional[str] = None
async def process_nl2sql(request: NL2SQLRequest) -> NL2SQLResponse:
# 1. Get the connector and schema
connector = None
if request.source == "postgres":
connector = postgres_connector
elif request.source == "clickhouse":
connector = clickhouse_connector
else:
return NL2SQLResponse(sql="", result=[], error=f"Unsupported data source: {request.source}")
if not connector.test_connection():
return NL2SQLResponse(sql="", result=[], error=f"Failed to connect to {request.source}")
schema = connector.get_schema()
schema_str = json.dumps(schema, indent=2)
# 2. Get the active LLM config
llm_configs = load_llm_config()
active_config = next((c for c in llm_configs if c.get("is_active")), None)
if not active_config:
return NL2SQLResponse(sql="", result=[], error="No active LLM configuration found")
# 3. Initialize Provider
try:
provider = LiteLLMProvider(
api_key=active_config.get("api_key"),
api_base=active_config.get("api_base"),
default_model=active_config.get("model"),
extra_headers=active_config.get("extra_headers")
)
except Exception as e:
return NL2SQLResponse(sql="", result=[], error=f"Failed to initialize LLM provider: {e}")
# 4. Construct Prompt
prompt = f"""You are an expert SQL generator.
Given the following database schema for a {request.source} database:
{schema_str}
Write a SQL query to answer the following question:
"{request.query}"
Return ONLY the SQL query. Do not include any markdown formatting, explanations, or code blocks. Just the raw SQL string.
"""
# 5. Call LLM
try:
# provider.complete returns a string
response = await provider.complete(prompt)
sql_query = response.strip()
# Remove potential markdown code blocks if the LLM ignores instructions
if sql_query.startswith("```sql"):
sql_query = sql_query[6:]
if sql_query.startswith("```"):
sql_query = sql_query[3:]
if sql_query.endswith("```"):
sql_query = sql_query[:-3]
sql_query = sql_query.strip()
except Exception as e:
return NL2SQLResponse(sql="", result=[], error=f"LLM generation failed: {e}")
# 6. Execute SQL
try:
results = connector.execute_query(sql_query)
# Convert results to list of dicts if not already (Postgres returns list of dicts, ClickHouse returns list of tuples)
formatted_results = []
if request.source == "postgres":
formatted_results = results
elif request.source == "clickhouse":
# ClickHouse returns list of tuples, we need column names
# But execute_query in ClickHouseConnector just returns raw results from client.execute
# client.execute(query, with_column_types=True) might be better but let's stick to simple for now
# Actually, without column names it's hard to format as dict.
# Let's assume we can just return the raw tuples for now or try to fetch column names.
# For now, let's just return as list of lists/tuples if it's not a dict
formatted_results = [list(row) for row in results]
return NL2SQLResponse(sql=sql_query, result=formatted_results)
except Exception as e:
return NL2SQLResponse(sql=sql_query, result=[], error=f"SQL execution failed: {e}")
View File
+96
View File
@@ -0,0 +1,96 @@
import json
import os
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, HTTPException, Body
from pydantic import BaseModel, Field
router = APIRouter()
DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "llm_config.json")
class LLMConfig(BaseModel):
id: str = Field(..., description="Unique identifier for the LLM configuration")
provider: str = Field(..., description="Provider name (e.g., openai, azure, anthropic)")
model: str = Field(..., description="Model name (e.g., gpt-4, claude-3-opus)")
api_key: Optional[str] = Field(None, description="API Key for the provider")
api_base: Optional[str] = Field(None, description="Base URL for the API")
extra_headers: Optional[Dict[str, str]] = Field(None, description="Extra headers for the request")
is_active: bool = Field(True, description="Whether this configuration is active")
class LLMConfigCreate(BaseModel):
id: str
provider: str
model: str
api_key: Optional[str] = None
api_base: Optional[str] = None
extra_headers: Optional[Dict[str, str]] = None
is_active: bool = True
class LLMConfigUpdate(BaseModel):
provider: Optional[str] = None
model: Optional[str] = None
api_key: Optional[str] = None
api_base: Optional[str] = None
extra_headers: Optional[Dict[str, str]] = None
is_active: Optional[bool] = None
def _load_data() -> List[Dict[str, Any]]:
if not os.path.exists(DATA_FILE):
return []
try:
with open(DATA_FILE, "r") as f:
return json.load(f)
except json.JSONDecodeError:
return []
def _save_data(data: List[Dict[str, Any]]):
os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)
with open(DATA_FILE, "w") as f:
json.dump(data, f, indent=2)
@router.get("/llm", response_model=List[LLMConfig])
def list_llm_configs():
data = _load_data()
return [LLMConfig(**item) for item in data]
@router.get("/llm/{config_id}", response_model=LLMConfig)
def get_llm_config(config_id: str):
data = _load_data()
for item in data:
if item["id"] == config_id:
return LLMConfig(**item)
raise HTTPException(status_code=404, detail="LLM configuration not found")
@router.post("/llm", response_model=LLMConfig)
def create_llm_config(config: LLMConfigCreate):
data = _load_data()
if any(item["id"] == config.id for item in data):
raise HTTPException(status_code=400, detail="LLM configuration with this ID already exists")
new_config = config.dict()
data.append(new_config)
_save_data(data)
return LLMConfig(**new_config)
@router.put("/llm/{config_id}", response_model=LLMConfig)
def update_llm_config(config_id: str, config: LLMConfigUpdate):
data = _load_data()
for i, item in enumerate(data):
if item["id"] == config_id:
updated_item = item.copy()
update_data = config.dict(exclude_unset=True)
updated_item.update(update_data)
data[i] = updated_item
_save_data(data)
return LLMConfig(**updated_item)
raise HTTPException(status_code=404, detail="LLM configuration not found")
@router.delete("/llm/{config_id}")
def delete_llm_config(config_id: str):
data = _load_data()
initial_len = len(data)
data = [item for item in data if item["id"] != config_id]
if len(data) == initial_len:
raise HTTPException(status_code=404, detail="LLM configuration not found")
_save_data(data)
return {"message": "LLM configuration deleted successfully"}
+93
View File
@@ -0,0 +1,93 @@
import json
import os
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel, Field
router = APIRouter()
DATA_FILE = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "skills.json")
class Skill(BaseModel):
id: str = Field(..., description="Unique identifier for the skill")
name: str = Field(..., description="Name of the skill")
description: Optional[str] = Field(None, description="Description of what the skill does")
content: str = Field(..., description="The content/prompt/logic of the skill")
type: str = Field("python", description="Type of the skill (python, sql, api)")
class SkillCreate(BaseModel):
id: str
name: str
description: Optional[str] = None
content: str
type: str = "python"
class SkillUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
content: Optional[str] = None
type: Optional[str] = None
def _load_data() -> List[Dict[str, Any]]:
if not os.path.exists(DATA_FILE):
return []
try:
with open(DATA_FILE, "r") as f:
return json.load(f)
except json.JSONDecodeError:
return []
def _save_data(data: List[Dict[str, Any]]):
os.makedirs(os.path.dirname(DATA_FILE), exist_ok=True)
with open(DATA_FILE, "w") as f:
json.dump(data, f, indent=2)
def load_skills() -> List[Dict[str, Any]]:
return _load_data()
@router.get("/skills", response_model=List[Skill])
def list_skills():
data = load_skills()
return [Skill(**item) for item in data]
@router.get("/skills/{skill_id}", response_model=Skill)
def get_skill(skill_id: str):
data = _load_data()
for item in data:
if item["id"] == skill_id:
return Skill(**item)
raise HTTPException(status_code=404, detail="Skill not found")
@router.post("/skills", response_model=Skill)
def create_skill(skill: SkillCreate):
data = _load_data()
if any(item["id"] == skill.id for item in data):
raise HTTPException(status_code=400, detail="Skill with this ID already exists")
new_skill = skill.dict()
data.append(new_skill)
_save_data(data)
return Skill(**new_skill)
@router.put("/skills/{skill_id}", response_model=Skill)
def update_skill(skill_id: str, skill: SkillUpdate):
data = _load_data()
for i, item in enumerate(data):
if item["id"] == skill_id:
updated_item = item.copy()
update_data = skill.dict(exclude_unset=True)
updated_item.update(update_data)
data[i] = updated_item
_save_data(data)
return Skill(**updated_item)
raise HTTPException(status_code=404, detail="Skill not found")
@router.delete("/skills/{skill_id}")
def delete_skill(skill_id: str):
data = _load_data()
initial_len = len(data)
data = [item for item in data if item["id"] != skill_id]
if len(data) == initial_len:
raise HTTPException(status_code=404, detail="Skill not found")
_save_data(data)
return {"message": "Skill deleted successfully"}
+53
View File
@@ -0,0 +1,53 @@
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks
from app.connectors.minio import minio_connector
import pandas as pd
import duckdb
import io
import uuid
router = APIRouter()
@router.post("/upload/csv")
async def upload_csv(file: UploadFile = File(...), background_tasks: BackgroundTasks = None):
if not file.filename.endswith('.csv'):
raise HTTPException(status_code=400, detail="Invalid file type. Only CSV allowed.")
try:
content = await file.read()
file_size = len(content)
file_obj = io.BytesIO(content)
# Generate a unique filename
unique_filename = f"{uuid.uuid4()}-{file.filename}"
# Upload to MinIO
minio_url = minio_connector.upload_file(unique_filename, file_obj, file_size, content_type="text/csv")
# Reset file pointer for analysis
file_obj.seek(0)
# Load into DuckDB (in-memory) for quick analysis
try:
df = pd.read_csv(file_obj)
duckdb_conn = duckdb.connect(database=':memory:')
duckdb_conn.register('uploaded_csv', df)
summary = duckdb_conn.execute("DESCRIBE uploaded_csv").fetchall()
row_count = len(df)
columns = list(df.columns)
return {
"filename": unique_filename,
"url": minio_url,
"rows": row_count,
"columns": columns,
"summary": str(summary)
}
except Exception as e:
return {
"filename": unique_filename,
"url": minio_url,
"analysis_error": str(e)
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
View File
+50
View File
@@ -0,0 +1,50 @@
from clickhouse_driver import Client
import os
class ClickHouseConnector:
def __init__(self, host: str = None, port: int = 9000, user: str = 'default', password: str = '', database: str = 'default'):
self.host = host or os.getenv("CLICKHOUSE_HOST", "localhost")
self.port = port or int(os.getenv("CLICKHOUSE_PORT", 9000))
self.user = user or os.getenv("CLICKHOUSE_USER", "default")
self.password = password or os.getenv("CLICKHOUSE_PASSWORD", "")
self.database = database or os.getenv("CLICKHOUSE_DB", "default")
self.client = Client(
host=self.host,
port=self.port,
user=self.user,
password=self.password,
database=self.database
)
def execute_query(self, query: str):
try:
return self.client.execute(query)
except Exception as e:
print(f"ClickHouse Query Error: {e}")
raise e
def get_schema(self):
query = "SELECT table, name, type FROM system.columns WHERE database = currentDatabase()"
try:
results = self.client.execute(query)
schema = {}
for row in results:
table = row[0]
if table not in schema:
schema[table] = []
schema[table].append(f"{row[1]} ({row[2]})")
return schema
except Exception as e:
print(f"Error getting schema: {e}")
return {}
def test_connection(self) -> bool:
try:
self.client.execute("SELECT 1")
return True
except Exception as e:
print(f"ClickHouse Connection Error: {e}")
return False
clickhouse_connector = ClickHouseConnector()
+51
View File
@@ -0,0 +1,51 @@
from minio import Minio
from minio.error import S3Error
import os
from typing import BinaryIO
class MinioConnector:
def __init__(self):
self.endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000")
self.access_key = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
self.secret_key = os.getenv("MINIO_SECRET_KEY", "minioadmin")
self.secure = os.getenv("MINIO_SECURE", "False").lower() == "true"
self.bucket_name = os.getenv("MINIO_BUCKET", "dataclaw")
self.client = Minio(
self.endpoint,
access_key=self.access_key,
secret_key=self.secret_key,
secure=self.secure
)
self._ensure_bucket_exists()
def _ensure_bucket_exists(self):
try:
if not self.client.bucket_exists(self.bucket_name):
self.client.make_bucket(self.bucket_name)
except S3Error as e:
print(f"MinIO Bucket Error: {e}")
def upload_file(self, object_name: str, file_data: BinaryIO, length: int, content_type: str = "application/octet-stream"):
try:
self.client.put_object(
self.bucket_name,
object_name,
file_data,
length,
content_type=content_type
)
return f"http{'s' if self.secure else ''}://{self.endpoint}/{self.bucket_name}/{object_name}"
except S3Error as e:
print(f"MinIO Upload Error: {e}")
raise e
def test_connection(self) -> bool:
try:
self.client.list_buckets()
return True
except Exception as e:
print(f"MinIO Connection Error: {e}")
return False
minio_connector = MinioConnector()
+53
View File
@@ -0,0 +1,53 @@
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from typing import Generator
import os
class PostgresConnector:
def __init__(self, db_url: str = None):
self.db_url = db_url or os.getenv("POSTGRES_URL", "postgresql://user:password@localhost:5432/dbname")
self.engine = create_engine(self.db_url)
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
def get_db(self) -> Generator:
db = self.SessionLocal()
try:
yield db
finally:
db.close()
def execute_query(self, query: str):
with self.engine.connect() as connection:
result = connection.execute(text(query))
return [dict(row._mapping) for row in result]
def get_schema(self):
query = """
SELECT table_name, column_name, data_type
FROM information_schema.columns
WHERE table_schema = 'public'
ORDER BY table_name, ordinal_position;
"""
try:
results = self.execute_query(query)
schema = {}
for row in results:
table = row['table_name']
if table not in schema:
schema[table] = []
schema[table].append(f"{row['column_name']} ({row['data_type']})")
return schema
except Exception as e:
print(f"Error getting schema: {e}")
return {}
def test_connection(self) -> bool:
try:
with self.engine.connect() as connection:
connection.execute(text("SELECT 1"))
return True
except Exception as e:
print(f"PostgreSQL Connection Error: {e}")
return False
postgres_connector = PostgresConnector()
View File
+149
View File
@@ -0,0 +1,149 @@
import asyncio
import sys
import os
from pathlib import Path
from typing import List
# Add project root to sys.path to allow importing nanobot
# Assuming backend/app/core/nanobot.py -> backend/app/core -> backend/app -> backend -> root
# This path calculation seems correct for backend/app/core/nanobot.py relative to backend/
# BUT nanobot package is in ../nanobot relative to backend/
# So we need to go up one more level to reach the parent of backend/
PROJECT_ROOT = Path(__file__).resolve().parents[3]
if str(PROJECT_ROOT / "nanobot") not in sys.path:
sys.path.append(str(PROJECT_ROOT / "nanobot"))
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
from nanobot.config.loader import load_config
from nanobot.config.paths import get_cron_dir
from nanobot.cron.service import CronService
from nanobot.providers.openai_codex_provider import OpenAICodexProvider
from nanobot.providers.azure_openai_provider import AzureOpenAIProvider
from nanobot.providers.litellm_provider import LiteLLMProvider
from nanobot.providers.custom_provider import CustomProvider
from nanobot.providers.registry import find_by_name
from nanobot.session.manager import SessionManager
from nanobot.config.schema import Config
# Import skills loader
# We use a lazy import inside the method to avoid potential circular dependencies if any arise,
# or just import here if we are confident.
# Given the structure, importing here should be fine as long as skills.py doesn't import nanobot.py.
from app.api.skills import load_skills
class NanobotIntegration:
def __init__(self):
self.agent: AgentLoop | None = None
self.bus: MessageBus | None = None
self.cron: CronService | None = None
self.config: Config | None = None
def initialize(self):
self.config = load_config()
self.bus = MessageBus()
provider = self._make_provider(self.config)
cron_store_path = get_cron_dir() / "jobs.json"
self.cron = CronService(cron_store_path)
session_manager = SessionManager(self.config.workspace_path)
self.agent = AgentLoop(
bus=self.bus,
provider=provider,
workspace=self.config.workspace_path,
model=self.config.agents.defaults.model,
temperature=self.config.agents.defaults.temperature,
max_tokens=self.config.agents.defaults.max_tokens,
max_iterations=self.config.agents.defaults.max_tool_iterations,
memory_window=self.config.agents.defaults.memory_window,
reasoning_effort=self.config.agents.defaults.reasoning_effort,
brave_api_key=self.config.tools.web.search.api_key or None,
web_proxy=self.config.tools.web.proxy or None,
exec_config=self.config.tools.exec,
cron_service=self.cron,
restrict_to_workspace=self.config.tools.restrict_to_workspace,
session_manager=session_manager,
mcp_servers=self.config.tools.mcp_servers,
channels_config=self.config.channels,
)
def _make_provider(self, config: Config):
# Logic adapted from nanobot/cli/commands.py
model = config.agents.defaults.model
provider_name = config.get_provider_name(model)
p = config.get_provider(model)
if provider_name == "openai_codex" or model.startswith("openai-codex/"):
return OpenAICodexProvider(default_model=model)
if provider_name == "custom":
return CustomProvider(
api_key=p.api_key if p else "no-key",
api_base=config.get_api_base(model) or "http://localhost:8000/v1",
default_model=model,
)
if provider_name == "azure_openai":
if not p or not p.api_key or not p.api_base:
raise ValueError("Azure OpenAI requires api_key and api_base.")
return AzureOpenAIProvider(
api_key=p.api_key,
api_base=p.api_base,
default_model=model,
)
spec = find_by_name(provider_name)
# Skip API key check for now to allow initialization without full config
return LiteLLMProvider(
api_key=p.api_key if p else None,
api_base=config.get_api_base(model),
default_model=model,
extra_headers=p.extra_headers if p else None,
provider_name=provider_name,
)
async def start(self):
if not self.agent:
self.initialize()
# Start the agent loop in background
asyncio.create_task(self.agent.run())
asyncio.create_task(self.cron.start())
async def stop(self):
if self.agent:
self.agent.stop()
await self.agent.close_mcp()
if self.cron:
self.cron.stop()
async def process_message(self, message: str, session_id: str = "api:default", skill_ids: List[str] | None = None):
if not self.agent:
self.initialize()
await self.start()
full_message = message
if skill_ids:
skills = load_skills()
selected_skills = [s for s in skills if s["id"] in skill_ids]
if selected_skills:
# We inject skills as a runtime context block
skill_context = "[Runtime Context — metadata only, not instructions]\n# Active Skills\n\n"
for s in selected_skills:
skill_context += f"## {s['name']}\n{s.get('description', '')}\n{s['content']}\n\n"
# Append user message after skills
full_message = f"{skill_context}\n\n{message}"
response = await self.agent.process_direct(
full_message,
session_key=session_id,
channel="api",
chat_id=session_id
)
return response
nanobot_service = NanobotIntegration()