From 28c2864673f4609276d75847a840c84e1ea54fc2 Mon Sep 17 00:00:00 2001 From: xiamuceer Date: Thu, 6 Nov 2025 09:44:12 +0800 Subject: [PATCH] =?UTF-8?q?update:1.=E4=BC=98=E5=8C=96=E5=A4=A7=E7=BA=B2?= =?UTF-8?q?=E7=BB=AD=E5=86=99=E5=92=8C=E7=AB=A0=E8=8A=82=E5=86=85=E5=AE=B9?= =?UTF-8?q?=E7=94=9F=E6=88=90=E4=B8=8A=E4=B8=8B=E6=96=87=E6=9E=84=E5=BB=BA?= =?UTF-8?q?=E6=96=B9=E5=BC=8F=20=E5=AE=9E=E7=8E=B0=E6=99=BA=E8=83=BD?= =?UTF-8?q?=E6=9E=84=E5=BB=BA=E6=8F=90=E7=A4=BA=E8=AF=8D=EF=BC=88=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E8=B6=85=E9=95=BF=E7=AB=A0=E8=8A=82=E5=86=85=E5=AE=B9?= =?UTF-8?q?=EF=BC=89=202.=E5=AE=9E=E7=8E=B0=E7=AB=A0=E8=8A=82=E6=A6=82?= =?UTF-8?q?=E8=A6=81=E6=8F=90=E5=8F=96=EF=BC=8C=E5=B9=B6=E4=BF=9D=E5=AD=98?= =?UTF-8?q?=E5=88=B0=E5=90=91=E9=87=8F=E6=95=B0=E6=8D=AE=E5=BA=93=EF=BC=8C?= =?UTF-8?q?=E4=B8=BA=E5=90=8E=E7=BB=AD=E5=A4=A7=E7=BA=B2=E7=94=9F=E6=88=90?= =?UTF-8?q?=E5=92=8C=E5=86=85=E5=AE=B9=E6=8F=90=E4=BE=9B=E9=AA=A8=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/api/chapters.py | 242 +++++++++++++++++++++++--- backend/app/api/outlines.py | 154 +++++++++++++--- backend/app/services/plot_analyzer.py | 36 +++- 3 files changed, 386 insertions(+), 46 deletions(-) diff --git a/backend/app/api/chapters.py b/backend/app/api/chapters.py index 800676b..1d3d0b3 100644 --- a/backend/app/api/chapters.py +++ b/backend/app/api/chapters.py @@ -282,6 +282,198 @@ async def check_prerequisites(db: AsyncSession, chapter: Chapter) -> tuple[bool, return True, "", previous_chapters +async def build_smart_chapter_context( + db: AsyncSession, + project_id: str, + current_chapter_number: int, + user_id: str +) -> dict: + """ + 智能构建章节生成上下文(支持海量章节场景) + + 策略: + 1. 故事骨架:每50章采样1章(标题+摘要) + 2. 相关历史:通过chapter_summary记忆语义检索15个最相关章节 + 3. 近期概要:最近30章的简要摘要(200字/章) + 4. 最近完整:最近3章的完整内容 + + Args: + db: 数据库会话 + project_id: 项目ID + current_chapter_number: 当前章节序号 + user_id: 用户ID + + Returns: + 包含各部分上下文的字典 + """ + context_parts = { + 'story_skeleton': '', # 故事骨架 + 'relevant_history': '', # 相关历史章节 + 'recent_summary': '', # 近期概要 + 'recent_full': '', # 最近完整内容 + 'stats': {} # 统计信息 + } + + try: + # 1. 获取所有已完成的前置章节(只取ID和序号) + all_chapters_result = await db.execute( + select(Chapter.id, Chapter.chapter_number, Chapter.title) + .where(Chapter.project_id == project_id) + .where(Chapter.chapter_number < current_chapter_number) + .where(Chapter.content != None) + .where(Chapter.content != "") + .order_by(Chapter.chapter_number) + ) + all_chapters_info = all_chapters_result.all() + total_previous = len(all_chapters_info) + + if total_previous == 0: + logger.info("📚 这是第一章,无需构建前置上下文") + return context_parts + + logger.info(f"📚 开始构建智能上下文:共{total_previous}章前置内容") + + # 2. 构建故事骨架(每50章采样) + skeleton_chapters = [] + if total_previous > 50: + sample_interval = 50 + skeleton_indices = list(range(0, total_previous, sample_interval)) + + for idx in skeleton_indices: + chapter_info = all_chapters_info[idx] + # 获取章节摘要(优先从chapter_summary记忆获取) + summary_result = await db.execute( + select(StoryMemory.content) + .where(StoryMemory.project_id == project_id) + .where(StoryMemory.chapter_id == chapter_info.id) + .where(StoryMemory.memory_type == 'chapter_summary') + .limit(1) + ) + summary_row = summary_result.scalar_one_or_none() + summary = summary_row if summary_row else "(无摘要)" + + skeleton_chapters.append({ + 'number': chapter_info.chapter_number, + 'title': chapter_info.title, + 'summary': summary + }) + + context_parts['story_skeleton'] = "【故事骨架】\n" + "\n".join([ + f"第{ch['number']}章《{ch['title']}》:{ch['summary']}" + for ch in skeleton_chapters + ]) + logger.info(f" ✅ 故事骨架:采样{len(skeleton_chapters)}章(每50章1个)") + + # 3. 语义检索相关历史章节(使用chapter_summary记忆) + # 获取当前章节的大纲作为查询 + current_outline_result = await db.execute( + select(Outline.content) + .where(Outline.project_id == project_id) + .where(Outline.order_index == current_chapter_number) + ) + current_outline = current_outline_result.scalar_one_or_none() + + if current_outline and total_previous > 3: + # 使用记忆服务进行语义检索 + relevant_memories = await memory_service.search_memories( + user_id=user_id, + project_id=project_id, + query=current_outline, + memory_types=['chapter_summary'], + limit=15, # 检索15个最相关的章节 + min_importance=0.0 # 不过滤重要性,依赖语义相关度 + ) + + if relevant_memories: + relevant_chapters_text = [] + for mem in relevant_memories: + # 获取章节信息 + chapter_result = await db.execute( + select(Chapter.chapter_number, Chapter.title) + .where(Chapter.id == mem['metadata'].get('chapter_id')) + ) + chapter_info = chapter_result.first() + if chapter_info: + relevant_chapters_text.append( + f"第{chapter_info.chapter_number}章《{chapter_info.title}》:{mem['content']} " + f"(相关度:{mem['similarity']:.2f})" + ) + + context_parts['relevant_history'] = "【相关历史章节】\n" + "\n".join(relevant_chapters_text) + logger.info(f" ✅ 相关历史:语义检索到{len(relevant_chapters_text)}章") + + # 4. 近期概要(最近30章,每章200字摘要) + recent_summary_count = min(30, total_previous) + recent_for_summary = all_chapters_info[-recent_summary_count:] if total_previous > 3 else [] + + if recent_for_summary and len(recent_for_summary) > 3: # 至少要有3章才做摘要 + recent_summaries = [] + for chapter_info in recent_for_summary[:-3]: # 排除最后3章(它们会完整展示) + # 优先获取chapter_summary记忆 + summary_result = await db.execute( + select(StoryMemory.content) + .where(StoryMemory.project_id == project_id) + .where(StoryMemory.chapter_id == chapter_info.id) + .where(StoryMemory.memory_type == 'chapter_summary') + .limit(1) + ) + summary = summary_result.scalar_one_or_none() + + if summary: + recent_summaries.append( + f"第{chapter_info.chapter_number}章《{chapter_info.title}》:{summary}" + ) + + if recent_summaries: + context_parts['recent_summary'] = "【近期章节概要】\n" + "\n".join(recent_summaries) + logger.info(f" ✅ 近期概要:{len(recent_summaries)}章摘要") + + # 5. 最近完整内容(最近3章) + recent_full_count = min(3, total_previous) + recent_full_chapters = all_chapters_info[-recent_full_count:] + + # 获取完整内容 + recent_full_texts = [] + for chapter_info in recent_full_chapters: + chapter_result = await db.execute( + select(Chapter.content) + .where(Chapter.id == chapter_info.id) + ) + content = chapter_result.scalar_one_or_none() + if content: + recent_full_texts.append( + f"=== 第{chapter_info.chapter_number}章:{chapter_info.title} ===\n{content}" + ) + + context_parts['recent_full'] = "【最近章节完整内容】\n" + "\n\n".join(recent_full_texts) + logger.info(f" ✅ 最近完整:{len(recent_full_texts)}章全文") + + # 6. 统计信息 + context_parts['stats'] = { + 'total_previous': total_previous, + 'skeleton_samples': len(skeleton_chapters), + 'relevant_history': len(relevant_memories) if current_outline and total_previous > 3 else 0, + 'recent_summaries': len(recent_summaries) if recent_for_summary and len(recent_for_summary) > 3 else 0, + 'recent_full': len(recent_full_texts) + } + + # 计算总长度 + total_length = sum([ + len(context_parts['story_skeleton']), + len(context_parts['relevant_history']), + len(context_parts['recent_summary']), + len(context_parts['recent_full']) + ]) + context_parts['stats']['total_length'] = total_length + + logger.info(f"📊 智能上下文构建完成:总长度 {total_length} 字符") + + except Exception as e: + logger.error(f"❌ 构建智能上下文失败: {str(e)}", exc_info=True) + + return context_parts + + @router.get("/{chapter_id}/can-generate", summary="检查章节是否可以生成") async def check_can_generate( chapter_id: str, @@ -489,7 +681,8 @@ async def analyze_chapter_background( analysis=analysis_result, chapter_id=chapter_id, chapter_number=chapter.chapter_number, - chapter_content=chapter.content or "" + chapter_content=chapter.content or "", + chapter_title=chapter.title or "" ) # 先删除该章节的旧记忆(写操作,需要锁) @@ -742,27 +935,34 @@ async def generate_chapter_content_stream( else: logger.info("未指定写作风格,使用原始提示词") - # 构建前置章节内容上下文(使用之前保存的数据) + # 🚀 使用智能上下文构建(支持海量章节) + smart_context = await build_smart_chapter_context( + db=db_session, + project_id=project.id, + current_chapter_number=current_chapter.chapter_number, + user_id=current_user_id + ) + + # 组装上下文 previous_content = "" - if previous_chapters_data: - recent_chapters = previous_chapters_data[-3:] if len(previous_chapters_data) > 3 else previous_chapters_data - early_chapters = previous_chapters_data[:-3] if len(previous_chapters_data) > 3 else [] - - if early_chapters: - early_summary = "【前期剧情概要】\n" + "\n".join([ - f"第{ch['chapter_number']}章《{ch['title']}》:{ch['content'][:200] if ch['content'] else ''}..." - for ch in early_chapters - ]) - previous_content += early_summary + "\n\n" - - if recent_chapters: - recent_content = "【最近章节完整内容】\n" + "\n\n".join([ - f"=== 第{ch['chapter_number']}章:{ch['title']} ===\n{ch['content']}" - for ch in recent_chapters - ]) - previous_content += recent_content - - logger.info(f"构建前置上下文:{len(early_chapters)}章摘要 + {len(recent_chapters)}章完整内容") + if smart_context['story_skeleton']: + previous_content += smart_context['story_skeleton'] + "\n\n" + if smart_context['relevant_history']: + previous_content += smart_context['relevant_history'] + "\n\n" + if smart_context['recent_summary']: + previous_content += smart_context['recent_summary'] + "\n\n" + if smart_context['recent_full']: + previous_content += smart_context['recent_full'] + + # 日志输出统计信息 + stats = smart_context['stats'] + logger.info(f"📊 智能上下文统计:") + logger.info(f" - 前置章节总数: {stats.get('total_previous', 0)}") + logger.info(f" - 故事骨架采样: {stats.get('skeleton_samples', 0)}章") + logger.info(f" - 相关历史检索: {stats.get('relevant_history', 0)}章") + logger.info(f" - 近期章节概要: {stats.get('recent_summaries', 0)}章") + logger.info(f" - 最近完整内容: {stats.get('recent_full', 0)}章") + logger.info(f" - 上下文总长度: {stats.get('total_length', 0)}字符") # 🧠 构建记忆增强上下文 logger.info(f"🧠 开始构建记忆增强上下文...") diff --git a/backend/app/api/outlines.py b/backend/app/api/outlines.py index 1d86ba5..2ff646b 100644 --- a/backend/app/api/outlines.py +++ b/backend/app/api/outlines.py @@ -477,6 +477,92 @@ async def _generate_new_outline( return OutlineListResponse(total=len(outlines), items=outlines) +async def _build_smart_outline_context( + latest_outlines: List[Outline], + user_id: str, + project_id: str +) -> dict: + """ + 智能构建大纲续写上下文(支持海量大纲场景) + + 策略: + 1. 故事骨架:每50章采样1章(仅标题) + 2. 近期概要:最近20章(标题+简要) + 3. 最近详细:最近2章(完整内容) + + Args: + latest_outlines: 所有已有大纲列表 + user_id: 用户ID + project_id: 项目ID + + Returns: + 包含压缩后上下文的字典 + """ + total_count = len(latest_outlines) + + context = { + 'story_skeleton': '', # 故事骨架(标题列表) + 'recent_summary': '', # 近期概要(标题+内容前50字) + 'recent_detail': '', # 最近详细(完整内容) + 'stats': { + 'total': total_count, + 'skeleton_samples': 0, + 'recent_summaries': 0, + 'recent_details': 0 + } + } + + try: + # 1. 故事骨架(每50章采样,仅标题) + if total_count > 50: + sample_interval = 50 + skeleton_indices = list(range(0, total_count, sample_interval)) + skeleton_titles = [ + f"第{latest_outlines[idx].order_index}章: {latest_outlines[idx].title}" + for idx in skeleton_indices + ] + context['story_skeleton'] = "【故事骨架】\n" + "\n".join(skeleton_titles) + context['stats']['skeleton_samples'] = len(skeleton_titles) + logger.info(f" ✅ 故事骨架:采样{len(skeleton_titles)}章标题") + + # 2. 近期概要(最近20章,标题+内容前50字) + recent_summary_count = min(20, total_count) + if recent_summary_count > 2: # 排除最后2章(它们会完整展示) + recent_for_summary = latest_outlines[-recent_summary_count:-2] + recent_summaries = [ + f"第{o.order_index}章《{o.title}》: {o.content[:50]}..." + for o in recent_for_summary + ] + context['recent_summary'] = "【近期大纲概要】\n" + "\n".join(recent_summaries) + context['stats']['recent_summaries'] = len(recent_summaries) + logger.info(f" ✅ 近期概要:{len(recent_summaries)}章") + + # 3. 最近详细(最近2章,完整内容) + recent_detail_count = min(2, total_count) + recent_details = latest_outlines[-recent_detail_count:] + detail_texts = [ + f"第{o.order_index}章《{o.title}》: {o.content}" + for o in recent_details + ] + context['recent_detail'] = "【最近大纲详情】\n" + "\n".join(detail_texts) + context['stats']['recent_details'] = len(detail_texts) + logger.info(f" ✅ 最近详细:{len(detail_texts)}章") + + # 计算总长度 + total_length = sum([ + len(context['story_skeleton']), + len(context['recent_summary']), + len(context['recent_detail']) + ]) + context['stats']['total_length'] = total_length + logger.info(f"📊 大纲上下文总长度: {total_length} 字符") + + except Exception as e: + logger.error(f"❌ 构建智能大纲上下文失败: {str(e)}", exc_info=True) + + return context + + async def _continue_outline( request: OutlineGenerateRequest, project: Project, @@ -537,25 +623,35 @@ async def _continue_outline( ) latest_outlines = latest_result.scalars().all() - # 获取最近2章的剧情 - recent_outlines = latest_outlines[-2:] if len(latest_outlines) >= 2 else latest_outlines - recent_plot = "\n".join([ - f"第{o.order_index}章《{o.title}》: {o.content}" - for o in recent_outlines - ]) + # 🚀 使用智能上下文构建(支持海量大纲) + smart_context = await _build_smart_outline_context( + latest_outlines=latest_outlines, + user_id=user_id, + project_id=project.id + ) - # 全部章节概览 - all_chapters_brief = "\n".join([ - f"第{o.order_index}章: {o.title}" - for o in latest_outlines - ]) + # 组装上下文字符串 + all_chapters_brief = "" + if smart_context['story_skeleton']: + all_chapters_brief += smart_context['story_skeleton'] + "\n\n" + if smart_context['recent_summary']: + all_chapters_brief += smart_context['recent_summary'] + "\n\n" + + # 最近详细内容作为 recent_plot + recent_plot = smart_context['recent_detail'] + + # 日志统计 + stats = smart_context['stats'] + logger.info(f"📊 大纲上下文统计: 总数{stats['total']}, 骨架{stats['skeleton_samples']}, " + f"概要{stats['recent_summaries']}, 详细{stats['recent_details']}, " + f"长度{stats['total_length']}字符") # 🧠 构建记忆增强上下文(仅续写模式需要) memory_context = None try: logger.info(f"🧠 为第{batch_num + 1}批构建记忆上下文...") # 使用最近一章的大纲作为查询 - query_outline = recent_outlines[-1].content if recent_outlines else "" + query_outline = latest_outlines[-1].content if latest_outlines else "" memory_context = await memory_service.build_context_for_generation( user_id=user_id, project_id=project.id, @@ -952,18 +1048,28 @@ async def continue_outline_generator( ) latest_outlines = latest_result.scalars().all() - # 获取最近2章的剧情 - recent_outlines = latest_outlines[-2:] if len(latest_outlines) >= 2 else latest_outlines - recent_plot = "\n".join([ - f"第{o.order_index}章《{o.title}》: {o.content}" - for o in recent_outlines - ]) + # 🚀 使用智能上下文构建(支持海量大纲) + smart_context = await _build_smart_outline_context( + latest_outlines=latest_outlines, + user_id=user_id, + project_id=project_id + ) - # 全部章节概览 - all_chapters_brief = "\n".join([ - f"第{o.order_index}章: {o.title}" - for o in latest_outlines - ]) + # 组装上下文字符串 + all_chapters_brief = "" + if smart_context['story_skeleton']: + all_chapters_brief += smart_context['story_skeleton'] + "\n\n" + if smart_context['recent_summary']: + all_chapters_brief += smart_context['recent_summary'] + "\n\n" + + # 最近详细内容作为 recent_plot + recent_plot = smart_context['recent_detail'] + + # 日志统计 + stats = smart_context['stats'] + logger.info(f"📊 批次{batch_num + 1}大纲上下文: 总数{stats['total']}, " + f"骨架{stats['skeleton_samples']}, 概要{stats['recent_summaries']}, " + f"详细{stats['recent_details']}, 长度{stats['total_length']}字符") # 🧠 构建记忆增强上下文 memory_context = None @@ -972,7 +1078,7 @@ async def continue_outline_generator( f"🧠 构建记忆上下文...", batch_progress + 3 ) - query_outline = recent_outlines[-1].content if recent_outlines else "" + query_outline = latest_outlines[-1].content if latest_outlines else "" memory_context = await memory_service.build_context_for_generation( user_id=user_id, project_id=project_id, diff --git a/backend/app/services/plot_analyzer.py b/backend/app/services/plot_analyzer.py index 1f8e809..29e9636 100644 --- a/backend/app/services/plot_analyzer.py +++ b/backend/app/services/plot_analyzer.py @@ -298,7 +298,8 @@ class PlotAnalyzer: analysis: Dict[str, Any], chapter_id: str, chapter_number: int, - chapter_content: str = "" + chapter_content: str = "", + chapter_title: str = "" ) -> List[Dict[str, Any]]: """ 从分析结果中提取记忆片段 @@ -308,6 +309,7 @@ class PlotAnalyzer: chapter_id: 章节ID chapter_number: 章节号 chapter_content: 章节完整内容(用于计算位置) + chapter_title: 章节标题 Returns: 记忆片段列表 @@ -315,6 +317,38 @@ class PlotAnalyzer: memories = [] try: + # 【新增】0. 提取章节摘要作为记忆(用于语义检索相关章节) + chapter_summary = "" + + # 尝试从分析结果获取摘要 + if analysis.get('summary'): + chapter_summary = analysis.get('summary') + # 或者从情节点组合生成摘要 + elif analysis.get('plot_points'): + plot_summaries = [p.get('content', '') for p in analysis.get('plot_points', [])[:3]] + chapter_summary = ";".join(plot_summaries) + # 或者使用内容前300字 + elif chapter_content: + chapter_summary = chapter_content[:300] + ("..." if len(chapter_content) > 300 else "") + + # 如果有摘要,添加到记忆中 + if chapter_summary: + memories.append({ + 'type': 'chapter_summary', + 'content': chapter_summary, + 'title': f"第{chapter_number}章《{chapter_title}》摘要", + 'metadata': { + 'chapter_id': chapter_id, + 'chapter_number': chapter_number, + 'importance_score': 0.6, # 中等重要性 + 'tags': ['摘要', '章节概览', chapter_title], + 'is_foreshadow': 0, + 'text_position': 0, + 'text_length': len(chapter_summary) + } + }) + logger.info(f" ✅ 添加章节摘要记忆: {len(chapter_summary)}字") + # 1. 提取钩子作为记忆 for i, hook in enumerate(analysis.get('hooks', [])): if hook.get('strength', 0) >= 6: # 只保存强度>=6的钩子