fix:优化章节分析并发问题

This commit is contained in:
xiamuceer
2025-11-05 00:11:27 +08:00
parent e62286eab1
commit 7e9781477b
5 changed files with 478 additions and 340 deletions
+193 -171
View File
@@ -7,6 +7,7 @@ import json
import asyncio
from typing import Optional
from datetime import datetime
from asyncio import Queue, Lock
from app.database import get_db
from app.models.chapter import Chapter
@@ -34,6 +35,17 @@ from app.api.settings import get_user_ai_service
router = APIRouter(prefix="/chapters", tags=["章节管理"])
logger = get_logger(__name__)
# 全局数据库写入锁(每个用户一个锁,用于保护SQLite写入操作)
db_write_locks: dict[str, Lock] = {}
async def get_db_write_lock(user_id: str) -> Lock:
"""获取或创建用户的数据库写入锁"""
if user_id not in db_write_locks:
db_write_locks[user_id] = Lock()
logger.debug(f"🔒 为用户 {user_id} 创建数据库写入锁")
return db_write_locks[user_id]
@router.post("", response_model=ChapterResponse, summary="创建章节")
async def create_chapter(
@@ -318,7 +330,7 @@ async def analyze_chapter_background(
ai_service: AIService
):
"""
后台异步分析章节
后台异步分析章节(支持并发,使用锁保护数据库写入)
Args:
chapter_id: 章节ID
@@ -328,11 +340,10 @@ async def analyze_chapter_background(
ai_service: AI服务实例
"""
db_session = None
write_lock = await get_db_write_lock(user_id)
try:
logger.info(f"🔍 开始后台分析章节: {chapter_id}")
# 等待一小段时间,确保主会话的commit已经持久化到磁盘
await asyncio.sleep(0.1)
logger.info(f"🔍 开始分析章节: {chapter_id}, 任务ID: {task_id}")
# 创建独立数据库会话
from app.database import get_engine
@@ -346,43 +357,40 @@ async def analyze_chapter_background(
)
db_session = AsyncSessionLocal()
# 1. 获取任务(添加重试逻辑
task = None
for retry in range(3):
task_result = await db_session.execute(
select(AnalysisTask).where(AnalysisTask.id == task_id)
)
task = task_result.scalar_one_or_none()
if task:
break
if retry < 2:
logger.warning(f"⚠️ 第{retry+1}次未找到任务 {task_id},等待后重试...")
await asyncio.sleep(0.2)
# 1. 获取任务(读操作
task_result = await db_session.execute(
select(AnalysisTask).where(AnalysisTask.id == task_id)
)
task = task_result.scalar_one_or_none()
if not task:
logger.error(f"❌ 任务不存在: {task_id}")
return
task.status = 'running'
task.started_at = datetime.now()
task.progress = 10
await db_session.commit()
# 更新任务状态(写操作,需要锁)
async with write_lock:
task.status = 'running'
task.started_at = datetime.now()
task.progress = 10
await db_session.commit()
# 2. 获取章节信息
# 2. 获取章节信息(读操作)
chapter_result = await db_session.execute(
select(Chapter).where(Chapter.id == chapter_id)
)
chapter = chapter_result.scalar_one_or_none()
if not chapter or not chapter.content:
task.status = 'failed'
task.error_message = '章节不存在或内容为空'
task.completed_at = datetime.now()
await db_session.commit()
async with write_lock:
task.status = 'failed'
task.error_message = '章节不存在或内容为空'
task.completed_at = datetime.now()
await db_session.commit()
logger.error(f"❌ 章节不存在或内容为空: {chapter_id}")
return
task.progress = 20
await db_session.commit()
async with write_lock:
task.progress = 20
await db_session.commit()
# 3. 使用PlotAnalyzer分析章节
analyzer = PlotAnalyzer(ai_service)
@@ -394,84 +402,87 @@ async def analyze_chapter_background(
)
if not analysis_result:
task.status = 'failed'
task.error_message = 'AI分析失败,请检查日志'
task.completed_at = datetime.now()
await db_session.commit()
async with write_lock:
task.status = 'failed'
task.error_message = 'AI分析失败,请检查日志'
task.completed_at = datetime.now()
await db_session.commit()
logger.error(f"❌ AI分析失败: {chapter_id}")
return
task.progress = 60
await db_session.commit()
async with write_lock:
task.progress = 60
await db_session.commit()
# 4. 保存分析结果到数据库(先检查是否已存在
existing_analysis_result = await db_session.execute(
select(PlotAnalysis).where(PlotAnalysis.chapter_id == chapter_id)
)
existing_analysis = existing_analysis_result.scalar_one_or_none()
if existing_analysis:
# 更新现有记录
logger.info(f" 更新现有分析记录: {existing_analysis.id}")
existing_analysis.plot_stage = analysis_result.get('plot_stage', '发展')
existing_analysis.conflict_level = analysis_result.get('conflict', {}).get('level', 0)
existing_analysis.conflict_types = analysis_result.get('conflict', {}).get('types', [])
existing_analysis.emotional_tone = analysis_result.get('emotional_arc', {}).get('primary_emotion', '')
existing_analysis.emotional_intensity = analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0
existing_analysis.hooks = analysis_result.get('hooks', [])
existing_analysis.hooks_count = len(analysis_result.get('hooks', []))
existing_analysis.foreshadows = analysis_result.get('foreshadows', [])
existing_analysis.foreshadows_planted = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted')
existing_analysis.foreshadows_resolved = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved')
existing_analysis.plot_points = analysis_result.get('plot_points', [])
existing_analysis.plot_points_count = len(analysis_result.get('plot_points', []))
existing_analysis.character_states = analysis_result.get('character_states', [])
existing_analysis.scenes = analysis_result.get('scenes', [])
existing_analysis.pacing = analysis_result.get('pacing', 'moderate')
existing_analysis.overall_quality_score = analysis_result.get('scores', {}).get('overall', 0)
existing_analysis.pacing_score = analysis_result.get('scores', {}).get('pacing', 0)
existing_analysis.engagement_score = analysis_result.get('scores', {}).get('engagement', 0)
existing_analysis.coherence_score = analysis_result.get('scores', {}).get('coherence', 0)
existing_analysis.analysis_report = analyzer.generate_analysis_summary(analysis_result)
existing_analysis.suggestions = analysis_result.get('suggestions', [])
existing_analysis.dialogue_ratio = analysis_result.get('dialogue_ratio', 0)
existing_analysis.description_ratio = analysis_result.get('description_ratio', 0)
else:
# 创建新记录
logger.info(f" 创建新的分析记录")
plot_analysis = PlotAnalysis(
chapter_id=chapter_id,
project_id=project_id,
plot_stage=analysis_result.get('plot_stage', '发展'),
conflict_level=analysis_result.get('conflict', {}).get('level', 0),
conflict_types=analysis_result.get('conflict', {}).get('types', []),
emotional_tone=analysis_result.get('emotional_arc', {}).get('primary_emotion', ''),
emotional_intensity=analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0,
hooks=analysis_result.get('hooks', []),
hooks_count=len(analysis_result.get('hooks', [])),
foreshadows=analysis_result.get('foreshadows', []),
foreshadows_planted=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted'),
foreshadows_resolved=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved'),
plot_points=analysis_result.get('plot_points', []),
plot_points_count=len(analysis_result.get('plot_points', [])),
character_states=analysis_result.get('character_states', []),
scenes=analysis_result.get('scenes', []),
pacing=analysis_result.get('pacing', 'moderate'),
overall_quality_score=analysis_result.get('scores', {}).get('overall', 0),
pacing_score=analysis_result.get('scores', {}).get('pacing', 0),
engagement_score=analysis_result.get('scores', {}).get('engagement', 0),
coherence_score=analysis_result.get('scores', {}).get('coherence', 0),
analysis_report=analyzer.generate_analysis_summary(analysis_result),
suggestions=analysis_result.get('suggestions', []),
dialogue_ratio=analysis_result.get('dialogue_ratio', 0),
description_ratio=analysis_result.get('description_ratio', 0)
# 4. 保存分析结果到数据库(写操作,需要锁
async with write_lock:
existing_analysis_result = await db_session.execute(
select(PlotAnalysis).where(PlotAnalysis.chapter_id == chapter_id)
)
db_session.add(plot_analysis)
await db_session.commit()
task.progress = 80
await db_session.commit()
existing_analysis = existing_analysis_result.scalar_one_or_none()
if existing_analysis:
# 更新现有记录
logger.info(f" 更新现有分析记录: {existing_analysis.id}")
existing_analysis.plot_stage = analysis_result.get('plot_stage', '发展')
existing_analysis.conflict_level = analysis_result.get('conflict', {}).get('level', 0)
existing_analysis.conflict_types = analysis_result.get('conflict', {}).get('types', [])
existing_analysis.emotional_tone = analysis_result.get('emotional_arc', {}).get('primary_emotion', '')
existing_analysis.emotional_intensity = analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0
existing_analysis.hooks = analysis_result.get('hooks', [])
existing_analysis.hooks_count = len(analysis_result.get('hooks', []))
existing_analysis.foreshadows = analysis_result.get('foreshadows', [])
existing_analysis.foreshadows_planted = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted')
existing_analysis.foreshadows_resolved = sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved')
existing_analysis.plot_points = analysis_result.get('plot_points', [])
existing_analysis.plot_points_count = len(analysis_result.get('plot_points', []))
existing_analysis.character_states = analysis_result.get('character_states', [])
existing_analysis.scenes = analysis_result.get('scenes', [])
existing_analysis.pacing = analysis_result.get('pacing', 'moderate')
existing_analysis.overall_quality_score = analysis_result.get('scores', {}).get('overall', 0)
existing_analysis.pacing_score = analysis_result.get('scores', {}).get('pacing', 0)
existing_analysis.engagement_score = analysis_result.get('scores', {}).get('engagement', 0)
existing_analysis.coherence_score = analysis_result.get('scores', {}).get('coherence', 0)
existing_analysis.analysis_report = analyzer.generate_analysis_summary(analysis_result)
existing_analysis.suggestions = analysis_result.get('suggestions', [])
existing_analysis.dialogue_ratio = analysis_result.get('dialogue_ratio', 0)
existing_analysis.description_ratio = analysis_result.get('description_ratio', 0)
else:
# 创建新记录
logger.info(f" 创建新的分析记录")
plot_analysis = PlotAnalysis(
chapter_id=chapter_id,
project_id=project_id,
plot_stage=analysis_result.get('plot_stage', '发展'),
conflict_level=analysis_result.get('conflict', {}).get('level', 0),
conflict_types=analysis_result.get('conflict', {}).get('types', []),
emotional_tone=analysis_result.get('emotional_arc', {}).get('primary_emotion', ''),
emotional_intensity=analysis_result.get('emotional_arc', {}).get('intensity', 0) / 10.0,
hooks=analysis_result.get('hooks', []),
hooks_count=len(analysis_result.get('hooks', [])),
foreshadows=analysis_result.get('foreshadows', []),
foreshadows_planted=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'planted'),
foreshadows_resolved=sum(1 for f in analysis_result.get('foreshadows', []) if f.get('type') == 'resolved'),
plot_points=analysis_result.get('plot_points', []),
plot_points_count=len(analysis_result.get('plot_points', [])),
character_states=analysis_result.get('character_states', []),
scenes=analysis_result.get('scenes', []),
pacing=analysis_result.get('pacing', 'moderate'),
overall_quality_score=analysis_result.get('scores', {}).get('overall', 0),
pacing_score=analysis_result.get('scores', {}).get('pacing', 0),
engagement_score=analysis_result.get('scores', {}).get('engagement', 0),
coherence_score=analysis_result.get('scores', {}).get('coherence', 0),
analysis_report=analyzer.generate_analysis_summary(analysis_result),
suggestions=analysis_result.get('suggestions', []),
dialogue_ratio=analysis_result.get('dialogue_ratio', 0),
description_ratio=analysis_result.get('description_ratio', 0)
)
db_session.add(plot_analysis)
await db_session.commit()
task.progress = 80
await db_session.commit()
# 5. 提取记忆并保存到向量数据库(传入章节内容用于计算位置)
memories = analyzer.extract_memories_from_analysis(
@@ -481,16 +492,18 @@ async def analyze_chapter_background(
chapter_content=chapter.content or ""
)
# 先删除该章节的旧记忆(支持重新分析
old_memories_result = await db_session.execute(
select(StoryMemory).where(StoryMemory.chapter_id == chapter_id)
)
old_memories = old_memories_result.scalars().all()
for old_mem in old_memories:
await db_session.delete(old_mem)
logger.info(f" 删除旧记忆: {len(old_memories)}")
# 先删除该章节的旧记忆(写操作,需要锁
async with write_lock:
old_memories_result = await db_session.execute(
select(StoryMemory).where(StoryMemory.chapter_id == chapter_id)
)
old_memories = old_memories_result.scalars().all()
for old_mem in old_memories:
await db_session.delete(old_mem)
await db_session.commit()
logger.info(f" 删除旧记忆: {len(old_memories)}")
# 准备批量添加的记忆数据
# 准备批量添加的记忆数据(不需要锁)
memory_records = []
for mem in memories:
memory_id = f"{chapter_id}_{mem['type']}_{len(memory_records)}"
@@ -501,34 +514,35 @@ async def analyze_chapter_background(
'metadata': mem['metadata']
})
# 从metadata中提取位置信息
text_position = mem['metadata'].get('text_position', -1)
text_length = mem['metadata'].get('text_length', 0)
# 保存到关系数据库(写操作,需要锁)
async with write_lock:
for mem in memories:
memory_id = memory_records[memories.index(mem)]['id']
text_position = mem['metadata'].get('text_position', -1)
text_length = mem['metadata'].get('text_length', 0)
story_memory = StoryMemory(
id=memory_id,
project_id=project_id,
chapter_id=chapter_id,
memory_type=mem['type'],
content=mem['content'],
title=mem['title'],
importance_score=mem['metadata'].get('importance_score', 0.5),
tags=mem['metadata'].get('tags', []),
is_foreshadow=mem['metadata'].get('is_foreshadow', 0),
story_timeline=chapter.chapter_number,
chapter_position=text_position,
text_length=text_length,
related_characters=mem['metadata'].get('related_characters', []),
related_locations=mem['metadata'].get('related_locations', [])
)
db_session.add(story_memory)
if text_position >= 0:
logger.debug(f" 保存记忆 {memory_id}: position={text_position}, length={text_length}")
# 同时保存到关系数据库
story_memory = StoryMemory(
id=memory_id,
project_id=project_id,
chapter_id=chapter_id,
memory_type=mem['type'],
content=mem['content'],
title=mem['title'],
importance_score=mem['metadata'].get('importance_score', 0.5),
tags=mem['metadata'].get('tags', []),
is_foreshadow=mem['metadata'].get('is_foreshadow', 0),
story_timeline=chapter.chapter_number, # 使用章节序号作为时间线
chapter_position=text_position, # 保存文本位置
text_length=text_length, # 保存文本长度
related_characters=mem['metadata'].get('related_characters', []),
related_locations=mem['metadata'].get('related_locations', [])
)
db_session.add(story_memory)
# 记录日志便于调试
if text_position >= 0:
logger.debug(f" 保存记忆 {memory_id}: position={text_position}, length={text_length}")
await db_session.commit()
await db_session.commit()
# 批量添加到向量数据库
if memory_records:
@@ -539,32 +553,34 @@ async def analyze_chapter_background(
)
logger.info(f"✅ 添加{added_count}条记忆到向量库")
task.progress = 100
task.status = 'completed'
task.completed_at = datetime.now()
await db_session.commit()
# 最终更新任务状态(写操作,需要锁)
async with write_lock:
task.progress = 100
task.status = 'completed'
task.completed_at = datetime.now()
await db_session.commit()
logger.info(f"✅ 章节分析完成: {chapter_id}, 提取{len(memories)}条记忆")
except Exception as e:
logger.error(f"❌ 后台分析异常: {str(e)}", exc_info=True)
# 确保任务状态被更新为failed,避免前端一直轮询
# 确保任务状态被更新为failed(写操作,需要锁)
if db_session:
try:
# 重新获取任务以确保有最新状态
task_result = await db_session.execute(
select(AnalysisTask).where(AnalysisTask.id == task_id)
)
task = task_result.scalar_one_or_none()
if task:
task.status = 'failed'
task.error_message = str(e)[:500]
task.completed_at = datetime.now()
task.progress = 0 # 重置进度
await db_session.commit()
logger.info(f"✅ 任务状态已更新为failed: {task_id}")
else:
logger.error(f"❌ 无法找到任务进行状态更新: {task_id}")
async with write_lock:
task_result = await db_session.execute(
select(AnalysisTask).where(AnalysisTask.id == task_id)
)
task = task_result.scalar_one_or_none()
if task:
task.status = 'failed'
task.error_message = str(e)[:500]
task.completed_at = datetime.now()
task.progress = 0
await db_session.commit()
logger.info(f"✅ 任务状态已更新为failed: {task_id}")
else:
logger.error(f"❌ 无法找到任务进行状态更新: {task_id}")
except Exception as update_error:
logger.error(f"❌ 更新任务状态失败: {str(update_error)}")
finally:
@@ -835,7 +851,7 @@ async def generate_chapter_content_stream(
logger.info(f"成功创作章节 {chapter_id},共 {new_word_count}")
# 创建分析任务并启动后台分析
# 创建分析任务
analysis_task = AnalysisTask(
chapter_id=chapter_id,
user_id=current_user_id,
@@ -845,11 +861,15 @@ async def generate_chapter_content_stream(
)
db_session.add(analysis_task)
await db_session.commit()
# 不需要refresh,只需要获取ID
await db_session.refresh(analysis_task)
task_id = analysis_task.id
logger.info(f"📋 已创建分析任务: {task_id}")
# 启动后台分析任务
# 短暂延迟确保SQLite WAL完成写入
await asyncio.sleep(0.05)
# 直接启动后台分析(并发执行)
background_tasks.add_task(
analyze_chapter_background,
chapter_id=chapter_id,
@@ -859,8 +879,6 @@ async def generate_chapter_content_stream(
ai_service=user_ai_service
)
logger.info(f"📋 已创建分析任务: {task_id}")
# 发送完成事件(包含分析任务ID
completion_data = {
'type': 'done',
@@ -870,13 +888,13 @@ async def generate_chapter_content_stream(
}
yield f"data: {json.dumps(completion_data, ensure_ascii=False)}\n\n"
# 发送分析排队事件
analysis_queued_data = {
'type': 'analysis_queued',
# 发送分析开始事件
analysis_started_data = {
'type': 'analysis_started',
'task_id': task_id,
'message': '章节分析已加入队列'
'message': '章节分析已开始'
}
yield f"data: {json.dumps(analysis_queued_data, ensure_ascii=False)}\n\n"
yield f"data: {json.dumps(analysis_started_data, ensure_ascii=False)}\n\n"
break # 退出async for db_session循环
@@ -1211,11 +1229,17 @@ async def trigger_chapter_analysis(
)
db.add(analysis_task)
await db.commit()
# 注意:不需要refresh,因为我们只需要id,而id在commit后已经生成
task_id = analysis_task.id
logger.info(f"📋 创建分析任务: {task_id}, 章节: {chapter_id}")
# 启动后台分析任务
# 刷新数据库会话,确保其他会话可以看到新任务
await db.refresh(analysis_task)
# 短暂延迟确保SQLite WAL完成写入(让其他会话可见)
await asyncio.sleep(3)
# 直接启动后台分析(并发执行)
background_tasks.add_task(
analyze_chapter_background,
chapter_id=chapter_id,
@@ -1225,11 +1249,9 @@ async def trigger_chapter_analysis(
ai_service=user_ai_service
)
logger.info(f"📋 手动触发分析任务: {task_id}")
return {
"task_id": task_id,
"chapter_id": chapter_id,
"status": "pending",
"message": "分析任务已创建并加入队列"
"message": "分析任务已创建并开始执行"
}