fix:优化章节分析并发问题

2025-11-05 00:11:27 +08:00
parent e62286eab1
commit 7e9781477b
5 changed files with 478 additions and 340 deletions
@@ -7,6 +7,7 @@ import json
 import asyncio
 from typing import Optional
 from datetime import datetime
+from asyncio import Queue, Lock

 from app.database import get_db
 from app.models.chapter import Chapter
@@ -34,6 +35,17 @@ from app.api.settings import get_user_ai_service
 router = APIRouter(prefix="/chapters", tags=["章节管理"])
 logger = get_logger(__name__)

+# 全局数据库写入锁（每个用户一个锁，用于保护SQLite写入操作）
+db_write_locks: dict[str, Lock] = {}
+
+
+async def get_db_write_lock(user_id: str) -> Lock:
+    """获取或创建用户的数据库写入锁"""
+    if user_id not in db_write_locks:
+        db_write_locks[user_id] = Lock()
+        logger.debug(f"🔒 为用户 {user_id} 创建数据库写入锁")
+    return db_write_locks[user_id]
+

@router.post("", response_model=ChapterResponse, summary="创建章节")
 async def create_chapter(
@@ -318,7 +330,7 @@ async def analyze_chapter_background(
    ai_service: AIService
 ):
    """
-    后台异步分析章节
+    后台异步分析章节（支持并发，使用锁保护数据库写入）
    
    Args:
        chapter_id: 章节ID
@@ -328,11 +340,10 @@ async def analyze_chapter_background(
        ai_service: AI服务实例
    """
    db_session = None
+    write_lock = await get_db_write_lock(user_id)
+    
    try:
-        logger.info(f"🔍 开始后台分析章节: {chapter_id}")
-        
-        # 等待一小段时间，确保主会话的commit已经持久化到磁盘
-        await asyncio.sleep(0.1)
+        logger.info(f"🔍 开始分析章节: {chapter_id}, 任务ID: {task_id}")
        
        # 创建独立数据库会话
        from app.database import get_engine
@@ -346,43 +357,40 @@ async def analyze_chapter_background(
        )
        db_session = AsyncSessionLocal()
        
-        # 1. 获取任务（添加重试逻辑）
-        task = None
-        for retry in range(3):
-            task_result = await db_session.execute(
-                select(AnalysisTask).where(AnalysisTask.id == task_id)
-            )
-            task = task_result.scalar_one_or_none()
-            if task:
-                break
-            if retry < 2:
-                logger.warning(f"⚠️ 第{retry+1}次未找到任务 {task_id}，等待后重试...")
-                await asyncio.sleep(0.2)
+        # 1. 获取任务（读操作）
+        task_result = await db_session.execute(
+            select(AnalysisTask).where(AnalysisTask.id == task_id)
+        )
+        task = task_result.scalar_one_or_none()
        
        if not task:
            logger.error(f"❌ 任务不存在: {task_id}")
            return
        
-        task.status = 'running'
-        task.started_at = datetime.now()
-        task.progress = 10
-        await db_session.commit()
+        # 更新任务状态（写操作，需要锁）
+        async with write_lock:
+            task.status = 'running'
+            task.started_at = datetime.now()
+            task.progress = 10
+            await db_session.commit()
        
-        # 2. 获取章节信息
+        # 2. 获取章节信息（读操作）
        chapter_result = await db_session.execute(
            select(Chapter).where(Chapter.id == chapter_id)
        )
        chapter = chapter_result.scalar_one_or_none()
        if not chapter or not chapter.content:
-            task.status = 'failed'
-            task.error_message = '章节不存在或内容为空'
-            task.completed_at = datetime.now()
-            await db_session.commit()
+            async with write_lock:
+                task.status = 'failed'
+                task.error_message = '章节不存在或内容为空'
+                task.completed_at = datetime.now()
+                await db_session.commit()
            logger.error(f"❌ 章节不存在或内容为空: {chapter_id}")
            return
        
-        task.progress = 20
-        await db_session.commit()
+        async with write_lock:
+            task.progress = 20
+            await db_session.commit()
        
        # 3. 使用PlotAnalyzer分析章节
        analyzer = PlotAnalyzer(ai_service)
@@ -394,84 +402,87 @@ async def analyze_chapter_background(
        )
        
        if not analysis_result:
-            task.status = 'failed'
-            task.error_message = 'AI分析失败，请检查日志'
-            task.completed_at = datetime.now()
-            await db_session.commit()
+            async with write_lock:
+                task.status = 'failed'
+                task.error_message = 'AI分析失败，请检查日志'
+                task.completed_at = datetime.now()
+                await db_session.commit()
            logger.error(f"❌ AI分析失败: {chapter_id}")
            return
        
-        task.progress = 60
-        await db_session.commit()
+        async with write_lock:
+            task.progress = 60
+            await db_session.commit()
        
-        # 4. 保存分析结果到数据库（先检查是否已存在）
-        existing_analysis_result = await db_session.execute(
-            select(PlotAnalysis).where(PlotAnalysis.chapter_id == chapter_id)
-        )
-        existing_analysis = existing_analysis_result.scalar_one_or_none()
-        
-        if existing_analysis:
-            # 更新现有记录
-            logger.info(f"  更新现有分析记录: {existing_analysis.id}")
-            existing_analysis.plot_stage = analysis_result.get('plot_stage', '发展')
-            existing_analysis.conflict_level = analysis_result.get('conflict', {}).get('level', 0)
-            existing_analysis.conflict_types = analysis_result.get('conflict', {}).get('types', [])
-            existing_analysis.emotional_tone = analysis_result.get('emotional_arc', {}).get('primary_emotion', '')
-            existing_analysis.emotional_intensity = analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0
-            existing_analysis.hooks = analysis_result.get('hooks', [])
-            existing_analysis.hooks_count = len(analysis_result.get('hooks', []))
-            existing_analysis.foreshadows = analysis_result.get('foreshadows', [])
-            existing_analysis.foreshadows_planted = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted')
-            existing_analysis.foreshadows_resolved = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved')
-            existing_analysis.plot_points = analysis_result.get('plot_points', [])
-            existing_analysis.plot_points_count = len(analysis_result.get('plot_points', []))
-            existing_analysis.character_states = analysis_result.get('character_states', [])
-            existing_analysis.scenes = analysis_result.get('scenes', [])
-            existing_analysis.pacing = analysis_result.get('pacing', 'moderate')
-            existing_analysis.overall_quality_score = analysis_result.get('scores', {}).get('overall', 0)
-            existing_analysis.pacing_score = analysis_result.get('scores', {}).get('pacing', 0)
-            existing_analysis.engagement_score = analysis_result.get('scores', {}).get('engagement', 0)
-            existing_analysis.coherence_score = analysis_result.get('scores', {}).get('coherence', 0)
-            existing_analysis.analysis_report = analyzer.generate_analysis_summary(analysis_result)
-            existing_analysis.suggestions = analysis_result.get('suggestions', [])
-            existing_analysis.dialogue_ratio = analysis_result.get('dialogue_ratio', 0)
-            existing_analysis.description_ratio = analysis_result.get('description_ratio', 0)
-        else:
-            # 创建新记录
-            logger.info(f"  创建新的分析记录")
-            plot_analysis = PlotAnalysis(
-                chapter_id=chapter_id,
-                project_id=project_id,
-                plot_stage=analysis_result.get('plot_stage', '发展'),
-                conflict_level=analysis_result.get('conflict', {}).get('level', 0),
-                conflict_types=analysis_result.get('conflict', {}).get('types', []),
-                emotional_tone=analysis_result.get('emotional_arc', {}).get('primary_emotion', ''),
-                emotional_intensity=analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0,
-                hooks=analysis_result.get('hooks', []),
-                hooks_count=len(analysis_result.get('hooks', [])),
-                foreshadows=analysis_result.get('foreshadows', []),
-                foreshadows_planted=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted'),
-                foreshadows_resolved=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved'),
-                plot_points=analysis_result.get('plot_points', []),
-                plot_points_count=len(analysis_result.get('plot_points', [])),
-                character_states=analysis_result.get('character_states', []),
-                scenes=analysis_result.get('scenes', []),
-                pacing=analysis_result.get('pacing', 'moderate'),
-                overall_quality_score=analysis_result.get('scores', {}).get('overall', 0),
-                pacing_score=analysis_result.get('scores', {}).get('pacing', 0),
-                engagement_score=analysis_result.get('scores', {}).get('engagement', 0),
-                coherence_score=analysis_result.get('scores', {}).get('coherence', 0),
-                analysis_report=analyzer.generate_analysis_summary(analysis_result),
-                suggestions=analysis_result.get('suggestions', []),
-                dialogue_ratio=analysis_result.get('dialogue_ratio', 0),
-                description_ratio=analysis_result.get('description_ratio', 0)
+        # 4. 保存分析结果到数据库（写操作，需要锁）
+        async with write_lock:
+            existing_analysis_result = await db_session.execute(
+                select(PlotAnalysis).where(PlotAnalysis.chapter_id == chapter_id)
            )
-            db_session.add(plot_analysis)
-        
-        await db_session.commit()
-        
-        task.progress = 80
-        await db_session.commit()
+            existing_analysis = existing_analysis_result.scalar_one_or_none()
+            
+            if existing_analysis:
+                # 更新现有记录
+                logger.info(f"  更新现有分析记录: {existing_analysis.id}")
+                existing_analysis.plot_stage = analysis_result.get('plot_stage', '发展')
+                existing_analysis.conflict_level = analysis_result.get('conflict', {}).get('level', 0)
+                existing_analysis.conflict_types = analysis_result.get('conflict', {}).get('types', [])
+                existing_analysis.emotional_tone = analysis_result.get('emotional_arc', {}).get('primary_emotion', '')
+                existing_analysis.emotional_intensity = analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0
+                existing_analysis.hooks = analysis_result.get('hooks', [])
+                existing_analysis.hooks_count = len(analysis_result.get('hooks', []))
+                existing_analysis.foreshadows = analysis_result.get('foreshadows', [])
+                existing_analysis.foreshadows_planted = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted')
+                existing_analysis.foreshadows_resolved = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved')
+                existing_analysis.plot_points = analysis_result.get('plot_points', [])
+                existing_analysis.plot_points_count = len(analysis_result.get('plot_points', []))
+                existing_analysis.character_states = analysis_result.get('character_states', [])
+                existing_analysis.scenes = analysis_result.get('scenes', [])
+                existing_analysis.pacing = analysis_result.get('pacing', 'moderate')
+                existing_analysis.overall_quality_score = analysis_result.get('scores', {}).get('overall', 0)
+                existing_analysis.pacing_score = analysis_result.get('scores', {}).get('pacing', 0)
+                existing_analysis.engagement_score = analysis_result.get('scores', {}).get('engagement', 0)
+                existing_analysis.coherence_score = analysis_result.get('scores', {}).get('coherence', 0)
+                existing_analysis.analysis_report = analyzer.generate_analysis_summary(analysis_result)
+                existing_analysis.suggestions = analysis_result.get('suggestions', [])
+                existing_analysis.dialogue_ratio = analysis_result.get('dialogue_ratio', 0)
+                existing_analysis.description_ratio = analysis_result.get('description_ratio', 0)
+            else:
+                # 创建新记录
+                logger.info(f"  创建新的分析记录")
+                plot_analysis = PlotAnalysis(
+                    chapter_id=chapter_id,
+                    project_id=project_id,
+                    plot_stage=analysis_result.get('plot_stage', '发展'),
+                    conflict_level=analysis_result.get('conflict', {}).get('level', 0),
+                    conflict_types=analysis_result.get('conflict', {}).get('types', []),
+                    emotional_tone=analysis_result.get('emotional_arc', {}).get('primary_emotion', ''),
+                    emotional_intensity=analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0,
+                    hooks=analysis_result.get('hooks', []),
+                    hooks_count=len(analysis_result.get('hooks', [])),
+                    foreshadows=analysis_result.get('foreshadows', []),
+                    foreshadows_planted=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted'),
+                    foreshadows_resolved=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved'),
+                    plot_points=analysis_result.get('plot_points', []),
+                    plot_points_count=len(analysis_result.get('plot_points', [])),
+                    character_states=analysis_result.get('character_states', []),
+                    scenes=analysis_result.get('scenes', []),
+                    pacing=analysis_result.get('pacing', 'moderate'),
+                    overall_quality_score=analysis_result.get('scores', {}).get('overall', 0),
+                    pacing_score=analysis_result.get('scores', {}).get('pacing', 0),
+                    engagement_score=analysis_result.get('scores', {}).get('engagement', 0),
+                    coherence_score=analysis_result.get('scores', {}).get('coherence', 0),
+                    analysis_report=analyzer.generate_analysis_summary(analysis_result),
+                    suggestions=analysis_result.get('suggestions', []),
+                    dialogue_ratio=analysis_result.get('dialogue_ratio', 0),
+                    description_ratio=analysis_result.get('description_ratio', 0)
+                )
+                db_session.add(plot_analysis)
+            
+            await db_session.commit()
+            
+            task.progress = 80
+            await db_session.commit()
        
        # 5. 提取记忆并保存到向量数据库（传入章节内容用于计算位置）
        memories = analyzer.extract_memories_from_analysis(
@@ -481,16 +492,18 @@ async def analyze_chapter_background(
            chapter_content=chapter.content or ""
        )
        
-        # 先删除该章节的旧记忆（支持重新分析）
-        old_memories_result = await db_session.execute(
-            select(StoryMemory).where(StoryMemory.chapter_id == chapter_id)
-        )
-        old_memories = old_memories_result.scalars().all()
-        for old_mem in old_memories:
-            await db_session.delete(old_mem)
-        logger.info(f"  删除旧记忆: {len(old_memories)}条")
+        # 先删除该章节的旧记忆（写操作，需要锁）
+        async with write_lock:
+            old_memories_result = await db_session.execute(
+                select(StoryMemory).where(StoryMemory.chapter_id == chapter_id)
+            )
+            old_memories = old_memories_result.scalars().all()
+            for old_mem in old_memories:
+                await db_session.delete(old_mem)
+            await db_session.commit()
+            logger.info(f"  删除旧记忆: {len(old_memories)}条")
        
-        # 准备批量添加的记忆数据
+        # 准备批量添加的记忆数据（不需要锁）
        memory_records = []
        for mem in memories:
            memory_id = f"{chapter_id}_{mem['type']}_{len(memory_records)}"
@@ -501,34 +514,35 @@ async def analyze_chapter_background(
                'metadata': mem['metadata']
            })
            
-            # 从metadata中提取位置信息
-            text_position = mem['metadata'].get('text_position', -1)
-            text_length = mem['metadata'].get('text_length', 0)
+        # 保存到关系数据库（写操作，需要锁）
+        async with write_lock:
+            for mem in memories:
+                memory_id = memory_records[memories.index(mem)]['id']
+                text_position = mem['metadata'].get('text_position', -1)
+                text_length = mem['metadata'].get('text_length', 0)
+                
+                story_memory = StoryMemory(
+                    id=memory_id,
+                    project_id=project_id,
+                    chapter_id=chapter_id,
+                    memory_type=mem['type'],
+                    content=mem['content'],
+                    title=mem['title'],
+                    importance_score=mem['metadata'].get('importance_score', 0.5),
+                    tags=mem['metadata'].get('tags', []),
+                    is_foreshadow=mem['metadata'].get('is_foreshadow', 0),
+                    story_timeline=chapter.chapter_number,
+                    chapter_position=text_position,
+                    text_length=text_length,
+                    related_characters=mem['metadata'].get('related_characters', []),
+                    related_locations=mem['metadata'].get('related_locations', [])
+                )
+                db_session.add(story_memory)
+                
+                if text_position >= 0:
+                    logger.debug(f"  保存记忆 {memory_id}: position={text_position}, length={text_length}")
            
-            # 同时保存到关系数据库
-            story_memory = StoryMemory(
-                id=memory_id,
-                project_id=project_id,
-                chapter_id=chapter_id,
-                memory_type=mem['type'],
-                content=mem['content'],
-                title=mem['title'],
-                importance_score=mem['metadata'].get('importance_score', 0.5),
-                tags=mem['metadata'].get('tags', []),
-                is_foreshadow=mem['metadata'].get('is_foreshadow', 0),
-                story_timeline=chapter.chapter_number,  # 使用章节序号作为时间线
-                chapter_position=text_position,  # 保存文本位置
-                text_length=text_length,  # 保存文本长度
-                related_characters=mem['metadata'].get('related_characters', []),
-                related_locations=mem['metadata'].get('related_locations', [])
-            )
-            db_session.add(story_memory)
-            
-            # 记录日志便于调试
-            if text_position >= 0:
-                logger.debug(f"  保存记忆 {memory_id}: position={text_position}, length={text_length}")
-        
-        await db_session.commit()
+            await db_session.commit()
        
        # 批量添加到向量数据库
        if memory_records:
@@ -539,32 +553,34 @@ async def analyze_chapter_background(
            )
            logger.info(f"✅ 添加{added_count}条记忆到向量库")
        
-        task.progress = 100
-        task.status = 'completed'
-        task.completed_at = datetime.now()
-        await db_session.commit()
+        # 最终更新任务状态（写操作，需要锁）
+        async with write_lock:
+            task.progress = 100
+            task.status = 'completed'
+            task.completed_at = datetime.now()
+            await db_session.commit()
        
        logger.info(f"✅ 章节分析完成: {chapter_id}, 提取{len(memories)}条记忆")
        
    except Exception as e:
        logger.error(f"❌ 后台分析异常: {str(e)}", exc_info=True)
-        # 确保任务状态被更新为failed，避免前端一直轮询
+        # 确保任务状态被更新为failed（写操作，需要锁）
        if db_session:
            try:
-                # 重新获取任务以确保有最新状态
-                task_result = await db_session.execute(
-                    select(AnalysisTask).where(AnalysisTask.id == task_id)
-                )
-                task = task_result.scalar_one_or_none()
-                if task:
-                    task.status = 'failed'
-                    task.error_message = str(e)[:500]
-                    task.completed_at = datetime.now()
-                    task.progress = 0  # 重置进度
-                    await db_session.commit()
-                    logger.info(f"✅ 任务状态已更新为failed: {task_id}")
-                else:
-                    logger.error(f"❌ 无法找到任务进行状态更新: {task_id}")
+                async with write_lock:
+                    task_result = await db_session.execute(
+                        select(AnalysisTask).where(AnalysisTask.id == task_id)
+                    )
+                    task = task_result.scalar_one_or_none()
+                    if task:
+                        task.status = 'failed'
+                        task.error_message = str(e)[:500]
+                        task.completed_at = datetime.now()
+                        task.progress = 0
+                        await db_session.commit()
+                        logger.info(f"✅ 任务状态已更新为failed: {task_id}")
+                    else:
+                        logger.error(f"❌ 无法找到任务进行状态更新: {task_id}")
            except Exception as update_error:
                logger.error(f"❌ 更新任务状态失败: {str(update_error)}")
    finally:
@@ -835,7 +851,7 @@ async def generate_chapter_content_stream(
                
                logger.info(f"成功创作章节 {chapter_id}，共 {new_word_count} 字")
                
-                # 创建分析任务并启动后台分析
+                # 创建分析任务
                analysis_task = AnalysisTask(
                    chapter_id=chapter_id,
                    user_id=current_user_id,
@@ -845,11 +861,15 @@ async def generate_chapter_content_stream(
                )
                db_session.add(analysis_task)
                await db_session.commit()
-                # 不需要refresh，只需要获取ID
+                await db_session.refresh(analysis_task)
                
                task_id = analysis_task.id
+                logger.info(f"📋 已创建分析任务: {task_id}")
                
-                # 启动后台分析任务
+                # 短暂延迟确保SQLite WAL完成写入
+                await asyncio.sleep(0.05)
+                
+                # 直接启动后台分析（并发执行）
                background_tasks.add_task(
                    analyze_chapter_background,
                    chapter_id=chapter_id,
@@ -859,8 +879,6 @@ async def generate_chapter_content_stream(
                    ai_service=user_ai_service
                )
                
-                logger.info(f"📋 已创建分析任务: {task_id}")
-                
                # 发送完成事件（包含分析任务ID）
                completion_data = {
                    'type': 'done',
@@ -870,13 +888,13 @@ async def generate_chapter_content_stream(
                }
                yield f"data: {json.dumps(completion_data, ensure_ascii=False)}\n\n"
                
-                # 发送分析排队事件
-                analysis_queued_data = {
-                    'type': 'analysis_queued',
+                # 发送分析开始事件
+                analysis_started_data = {
+                    'type': 'analysis_started',
                    'task_id': task_id,
-                    'message': '章节分析已加入队列'
+                    'message': '章节分析已开始'
                }
-                yield f"data: {json.dumps(analysis_queued_data, ensure_ascii=False)}\n\n"
+                yield f"data: {json.dumps(analysis_started_data, ensure_ascii=False)}\n\n"
                
                break  # 退出async for db_session循环
        
@@ -1211,11 +1229,17 @@ async def trigger_chapter_analysis(
    )
    db.add(analysis_task)
    await db.commit()
-    # 注意：不需要refresh，因为我们只需要id，而id在commit后已经生成
    
    task_id = analysis_task.id
+    logger.info(f"📋 创建分析任务: {task_id}, 章节: {chapter_id}")
    
-    # 启动后台分析任务
+    # 刷新数据库会话，确保其他会话可以看到新任务
+    await db.refresh(analysis_task)
+    
+    # 短暂延迟确保SQLite WAL完成写入（让其他会话可见）
+    await asyncio.sleep(3)
+    
+    # 直接启动后台分析（并发执行）
    background_tasks.add_task(
        analyze_chapter_background,
        chapter_id=chapter_id,
@@ -1225,11 +1249,9 @@ async def trigger_chapter_analysis(
        ai_service=user_ai_service
    )
    
-    logger.info(f"📋 手动触发分析任务: {task_id}")
-    
    return {
        "task_id": task_id,
        "chapter_id": chapter_id,
        "status": "pending",
-        "message": "分析任务已创建并加入队列"
+        "message": "分析任务已创建并开始执行"
    }