From cdd29513d324c9f127416bda038342b4bfdd68cb Mon Sep 17 00:00:00 2001 From: xiamuceer Date: Tue, 25 Nov 2025 15:30:44 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=20SQLite=20+=20?= =?UTF-8?q?=E7=A6=BB=E7=BA=BF=E9=83=A8=E7=BD=B2=20+=20MCP=20=E5=90=AF?= =?UTF-8?q?=E5=8A=A8=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 6 ++ backend/app/config.py | 2 +- backend/app/database.py | 115 ++++++++++++++++--------- backend/app/mcp/registry.py | 32 ++++--- backend/app/services/memory_service.py | 115 ++++++++++++++++++++----- backend/app/services/oauth_service.py | 11 ++- backend/requirements.txt | 1 + 7 files changed, 203 insertions(+), 79 deletions(-) diff --git a/.gitignore b/.gitignore index b3238d5..1fa8cde 100644 --- a/.gitignore +++ b/.gitignore @@ -102,6 +102,12 @@ dmypy.json # Jupyter Notebook .ipynb_checkpoints +#build +BUILD_GUIDE.md +launcher.py +launcher.spec + + data/ docs/ data_old/ diff --git a/backend/app/config.py b/backend/app/config.py index c41817a..6d6a43a 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -75,7 +75,7 @@ class Settings(BaseSettings): default_ai_provider: str = "openai" default_model: str = "gpt-4" default_temperature: float = 0.7 - default_max_tokens: int = 2000 + default_max_tokens: int = 32000 # MCP适配器配置 enable_mcp_adapter: bool = True # 是否启用MCP适配器(自动检测API能力) diff --git a/backend/app/database.py b/backend/app/database.py index 8064f6d..ede79d8 100644 --- a/backend/app/database.py +++ b/backend/app/database.py @@ -60,52 +60,83 @@ async def get_engine(user_id: str): async with _cache_lock: if cache_key not in _engine_cache: - # 优化后的PostgreSQL连接配置 - connect_args = { - "server_settings": { - "application_name": settings.app_name, - "jit": "off", # 关闭JIT以提高短查询性能 - }, - "command_timeout": 60, # 命令超时60秒 - "statement_cache_size": 500, # 启用语句缓存,提升重复查询性能 + # 检测数据库类型 + is_sqlite = 'sqlite' in settings.database_url.lower() + + # 基础引擎参数 + engine_args = { + "echo": settings.database_echo_pool, + "echo_pool": settings.database_echo_pool, + "future": True, } - engine = create_async_engine( - settings.database_url, - echo=settings.database_echo_pool, # 根据配置决定是否输出连接池日志 - echo_pool=settings.database_echo_pool, # 连接池操作日志 - future=True, - pool_size=settings.database_pool_size, # 核心连接数:50(优化后) - max_overflow=settings.database_max_overflow, # 溢出连接数:30(优化后) - pool_timeout=settings.database_pool_timeout, # 连接超时:90秒(优化后) - pool_pre_ping=settings.database_pool_pre_ping, # 连接前检测 - pool_recycle=settings.database_pool_recycle, # 连接回收:1800秒 - pool_use_lifo=settings.database_pool_use_lifo, # LIFO策略提高复用 - pool_reset_on_return=settings.database_pool_reset_on_return, # 连接归还时重置 - max_identifier_length=settings.database_max_identifier_length, # 标识符最大长度 - connect_args=connect_args - ) + if is_sqlite: + # SQLite 配置(使用 NullPool,不支持连接池参数) + engine_args["connect_args"] = { + "check_same_thread": False, + "timeout": 30.0, # 等待锁释放的超时时间(秒) + } + # 启用连接前检测以支持更好的并发 + engine_args["pool_pre_ping"] = True + + logger.info("📊 使用 SQLite 数据库(NullPool,超时30秒,WAL模式)") + else: + # PostgreSQL 配置(完整连接池支持) + connect_args = { + "server_settings": { + "application_name": settings.app_name, + "jit": "off", + }, + "command_timeout": 60, + "statement_cache_size": 500, + } + + engine_args.update({ + "pool_size": settings.database_pool_size, + "max_overflow": settings.database_max_overflow, + "pool_timeout": settings.database_pool_timeout, + "pool_pre_ping": settings.database_pool_pre_ping, + "pool_recycle": settings.database_pool_recycle, + "pool_use_lifo": settings.database_pool_use_lifo, + "pool_reset_on_return": settings.database_pool_reset_on_return, + "max_identifier_length": settings.database_max_identifier_length, + "connect_args": connect_args + }) + + total_connections = settings.database_pool_size + settings.database_max_overflow + estimated_concurrent_users = total_connections * 2 + + logger.info( + f"📊 PostgreSQL 连接池配置:\n" + f" ├─ 核心连接: {settings.database_pool_size}\n" + f" ├─ 溢出连接: {settings.database_max_overflow}\n" + f" ├─ 总连接数: {total_connections}\n" + f" ├─ 获取超时: {settings.database_pool_timeout}秒\n" + f" ├─ 连接回收: {settings.database_pool_recycle}秒\n" + f" └─ 预估并发: {estimated_concurrent_users}+用户" + ) + + engine = create_async_engine(settings.database_url, **engine_args) _engine_cache[cache_key] = engine - # 计算总连接数和预估并发能力 - total_connections = settings.database_pool_size + settings.database_max_overflow - estimated_concurrent_users = total_connections * 2 # 每个用户平均0.5个连接 - - logger.info( - f" \n" - f" ├─ 连接池配置:\n" - f" │ ├─ 核心连接: {settings.database_pool_size}\n" - f" │ ├─ 溢出连接: {settings.database_max_overflow}\n" - f" │ └─ 总连接数: {total_connections}\n" - f" ├─ 超时配置:\n" - f" │ ├─ 获取超时: {settings.database_pool_timeout}秒\n" - f" │ └─ 连接回收: {settings.database_pool_recycle}秒 ({settings.database_pool_recycle//60}分钟)\n" - f" ├─ 优化策略:\n" - f" │ ├─ 复用策略: LIFO(后进先出)\n" - f" │ ├─ 健康检查: Pre-ping enabled\n" - f" │ └─ 归还重置: {settings.database_pool_reset_on_return}\n" - f" └─ 预估并发: {estimated_concurrent_users}-{estimated_concurrent_users + 50}用户" - ) + # 如果是 SQLite,启用 WAL 模式以支持读写并发 + if is_sqlite: + try: + from sqlalchemy import event + from sqlalchemy.pool import NullPool + + @event.listens_for(engine.sync_engine, "connect") + def set_sqlite_pragma(dbapi_conn, connection_record): + cursor = dbapi_conn.cursor() + cursor.execute("PRAGMA journal_mode=WAL") + cursor.execute("PRAGMA synchronous=NORMAL") + cursor.execute("PRAGMA cache_size=-64000") # 64MB 缓存 + cursor.execute("PRAGMA busy_timeout=30000") # 30秒超时 + cursor.close() + + logger.info("✅ SQLite WAL 模式已启用(支持读写并发)") + except Exception as e: + logger.warning(f"⚠️ 启用 WAL 模式失败: {e},使用默认配置") return _engine_cache[cache_key] diff --git a/backend/app/mcp/registry.py b/backend/app/mcp/registry.py index f105894..e9a4b9d 100644 --- a/backend/app/mcp/registry.py +++ b/backend/app/mcp/registry.py @@ -55,17 +55,26 @@ class MCPPluginRegistry: # 启动后台清理任务 self._cleanup_task = None self._health_check_task = None - self._start_background_tasks() + self._tasks_started = False - def _start_background_tasks(self): - """启动后台任务""" - if self._cleanup_task is None: - self._cleanup_task = asyncio.create_task(self._cleanup_loop()) - logger.info("✅ MCP插件注册表后台清理任务已启动") - - if self._health_check_task is None: - self._health_check_task = asyncio.create_task(self._health_check_loop()) - logger.info("✅ MCP会话健康检查任务已启动") + def _ensure_background_tasks(self): + """确保后台任务已启动(延迟初始化)""" + if not self._tasks_started: + try: + # 检查是否有运行中的事件循环 + loop = asyncio.get_running_loop() + if self._cleanup_task is None: + self._cleanup_task = asyncio.create_task(self._cleanup_loop()) + logger.info("✅ MCP插件注册表后台清理任务已启动") + + if self._health_check_task is None: + self._health_check_task = asyncio.create_task(self._health_check_loop()) + logger.info("✅ MCP会话健康检查任务已启动") + + self._tasks_started = True + except RuntimeError: + # 没有运行中的事件循环,稍后再试 + pass async def _cleanup_loop(self): """后台清理过期客户端""" @@ -201,6 +210,9 @@ class MCPPluginRegistry: Returns: 是否加载成功 """ + # 确保后台任务已启动 + self._ensure_background_tasks() + # 使用细粒度锁(只锁定当前用户) user_lock = await self._get_user_lock(plugin.user_id) async with user_lock: diff --git a/backend/app/services/memory_service.py b/backend/app/services/memory_service.py index a467385..7ac3345 100644 --- a/backend/app/services/memory_service.py +++ b/backend/app/services/memory_service.py @@ -10,10 +10,29 @@ import hashlib logger = get_logger(__name__) -# 配置模型缓存目录(不设置离线模式,让它自动选择) -# 如果本地有模型就用本地的,没有才联网下载 +# 配置模型缓存目录 +# 优先使用 backend/embedding 目录(打包后的实际位置) +import sys +from pathlib import Path + if 'SENTENCE_TRANSFORMERS_HOME' not in os.environ: - os.environ['SENTENCE_TRANSFORMERS_HOME'] = 'embedding' + # 根据运行环境确定模型目录 + if getattr(sys, 'frozen', False): + # PyInstaller 打包后 + base_dir = Path(sys.executable).parent + else: + # 开发模式,从当前文件位置向上找到项目根目录 + base_dir = Path(__file__).parent.parent.parent + + model_dir = base_dir / 'backend' / 'embedding' + if model_dir.exists(): + os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(model_dir) + logger.info(f"🔧 设置模型目录: {model_dir}") + else: + # 降级到项目根目录的 embedding + fallback_dir = base_dir / 'embedding' + os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(fallback_dir) + logger.info(f"🔧 使用降级模型目录: {fallback_dir}") class MemoryService: @@ -44,9 +63,10 @@ class MemoryService: # 初始化多语言embedding模型(支持中文) logger.info("🔄 正在加载Embedding模型...") - # 确保模型缓存目录存在 - model_cache_dir = 'embedding' + # 使用环境变量中配置的模型目录 + model_cache_dir = os.environ.get('SENTENCE_TRANSFORMERS_HOME', 'embedding') os.makedirs(model_cache_dir, exist_ok=True) + logger.info(f"📂 使用模型缓存目录: {os.path.abspath(model_cache_dir)}") # 调试信息:打印环境变量和路径 logger.info(f"📂 当前工作目录: {os.getcwd()}") @@ -56,40 +76,91 @@ class MemoryService: logger.info(f"🔧 HF_HUB_OFFLINE: {os.environ.get('HF_HUB_OFFLINE', '未设置')}") # 检查模型目录内容 - if os.path.exists(model_cache_dir): + abs_cache_dir = os.path.abspath(model_cache_dir) + logger.info(f"📂 检查模型缓存目录: {abs_cache_dir}") + + if os.path.exists(abs_cache_dir): logger.info(f"📁 模型目录存在,检查内容...") try: - items = os.listdir(model_cache_dir) - logger.info(f"📁 模型目录内容: {items}") + items = os.listdir(abs_cache_dir) + logger.info(f"📁 模型目录内容 ({len(items)} 项): {items}") # 检查是否有预期的模型文件夹 - expected_model_dir = os.path.join(model_cache_dir, 'models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2') + expected_model_dir = os.path.join(abs_cache_dir, 'models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2') + logger.info(f"🔍 检查预期路径: {expected_model_dir}") + if os.path.exists(expected_model_dir): - logger.info(f"✅ 找到本地模型目录: {expected_model_dir}") + logger.info(f"✅ 找到本地模型目录!") # 检查快照目录 snapshots_dir = os.path.join(expected_model_dir, 'snapshots') if os.path.exists(snapshots_dir): snapshots = os.listdir(snapshots_dir) - logger.info(f"📁 模型快照: {snapshots}") + logger.info(f"📁 模型快照 ({len(snapshots)} 个): {snapshots}") + # 检查是否有有效的快照 + if snapshots: + logger.info(f"✅ 发现有效快照,可以使用离线模式") else: - logger.warning(f"⚠️ 未找到本地模型目录: {expected_model_dir}") + logger.warning(f"⚠️ 未找到本地模型目录") + logger.warning(f" 预期位置: {expected_model_dir}") except Exception as e: logger.error(f"❌ 检查模型目录失败: {str(e)}") + import traceback + logger.error(f" 堆栈: {traceback.format_exc()}") else: - logger.warning(f"⚠️ 模型目录不存在: {os.path.abspath(model_cache_dir)}") + logger.warning(f"⚠️ 模型目录不存在: {abs_cache_dir}") try: logger.info("🔄 尝试加载主模型: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") - # 优先使用本地缓存的模型 - # cache_folder会让模型优先从本地加载,只有不存在时才联网下载 - # 注意:不要设置local_files_only=True,这会阻止fallback到联网下载 - self.embedding_model = SentenceTransformer( - 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', - cache_folder=model_cache_dir, - device='cpu', # 明确指定使用CPU - trust_remote_code=False, # 安全起见 + + # 使用绝对路径检查本地模型 + abs_cache_dir = os.path.abspath(model_cache_dir) + local_model_path = os.path.join( + abs_cache_dir, + 'models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2' ) - logger.info("✅ Embedding模型加载成功 (paraphrase-multilingual-MiniLM-L12-v2)") + + logger.info(f"🔍 检查本地模型路径: {local_model_path}") + logger.info(f"🔍 路径存在检查: {os.path.exists(local_model_path)}") + + # 检查快照目录是否存在且有内容 + snapshots_dir = os.path.join(local_model_path, 'snapshots') + has_valid_model = False + if os.path.exists(snapshots_dir): + try: + snapshots = os.listdir(snapshots_dir) + if snapshots: + logger.info(f"✅ 发现本地模型快照: {snapshots}") + has_valid_model = True + except Exception as e: + logger.warning(f"⚠️ 检查快照失败: {e}") + + # 优先尝试从本地路径加载 + if has_valid_model: + logger.info(f"✅ 检测到完整本地模型,使用离线模式加载") + try: + self.embedding_model = SentenceTransformer( + 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', + cache_folder=abs_cache_dir, + device='cpu', + trust_remote_code=True, + local_files_only=True # 强制使用本地文件 + ) + logger.info("✅ Embedding模型加载成功 (离线模式)") + except Exception as local_err: + logger.warning(f"⚠️ 离线模式加载失败: {str(local_err)}") + logger.info("🔄 尝试在线模式...") + raise local_err + else: + logger.info("📥 本地模型不完整或不存在,将联网下载...") + logger.info(f" 下载后将保存到: {abs_cache_dir}") + self.embedding_model = SentenceTransformer( + 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', + cache_folder=abs_cache_dir, + device='cpu', + trust_remote_code=True, + local_files_only=False # 允许联网下载 + ) + logger.info("✅ Embedding模型加载成功 (在线下载)") except Exception as e: logger.warning(f"⚠️ 无法加载多语言模型: {str(e)}") logger.error(f"❌ 详细错误: {repr(e)}") diff --git a/backend/app/services/oauth_service.py b/backend/app/services/oauth_service.py index 4578029..9565a84 100644 --- a/backend/app/services/oauth_service.py +++ b/backend/app/services/oauth_service.py @@ -20,11 +20,14 @@ class LinuxDOOAuthService: self.client_secret = settings.LINUXDO_CLIENT_SECRET self.redirect_uri = settings.LINUXDO_REDIRECT_URI - # 验证redirect_uri配置 + # 如果未配置,使用默认值(本地开发) if not self.redirect_uri: - raise ValueError( - "LINUXDO_REDIRECT_URI 未配置!\n" - "请在 .env 文件中设置正确的回调地址:\n" + self.redirect_uri = "http://localhost:8000/api/auth/callback" + import logging + logger = logging.getLogger(__name__) + logger.warning( + "⚠️ LINUXDO_REDIRECT_URI 未配置,使用默认值: http://localhost:8000/api/auth/callback\n" + "如需使用 OAuth 登录,请在 .env 文件中配置:\n" "本地开发: LINUXDO_REDIRECT_URI=http://localhost:8000/api/auth/callback\n" "Docker部署: LINUXDO_REDIRECT_URI=https://your-domain.com/api/auth/callback" ) diff --git a/backend/requirements.txt b/backend/requirements.txt index d268955..bef0b5f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -19,6 +19,7 @@ anthropic==0.72.0 # 工具库 httpx==0.28.1 python-dotenv==1.0.0 +psutil==6.1.1 # MCP官方库(Model Context Protocol Python SDK) mcp==1.21.0