update:1.优化大纲续写和章节内容生成上下文构建方式实现智能构建提示词（支持超长章节内容） 2.实现章节概要提取，并保存到向量数据库，为后续大纲生成和内容提供骨架

2025-11-06 09:44:12 +08:00
parent 397ca30bcb
commit 28c2864673
3 changed files with 386 additions and 46 deletions
@@ -282,6 +282,198 @@ async def check_prerequisites(db: AsyncSession, chapter: Chapter) -> tuple[bool,
    return True, "", previous_chapters


+async def build_smart_chapter_context(
+    db: AsyncSession,
+    project_id: str,
+    current_chapter_number: int,
+    user_id: str
+) -> dict:
+    """
+    智能构建章节生成上下文（支持海量章节场景）
+    
+    策略：
+    1. 故事骨架：每50章采样1章（标题+摘要）
+    2. 相关历史：通过chapter_summary记忆语义检索15个最相关章节
+    3. 近期概要：最近30章的简要摘要（200字/章）
+    4. 最近完整：最近3章的完整内容
+    
+    Args:
+        db: 数据库会话
+        project_id: 项目ID
+        current_chapter_number: 当前章节序号
+        user_id: 用户ID
+        
+    Returns:
+        包含各部分上下文的字典
+    """
+    context_parts = {
+        'story_skeleton': '',      # 故事骨架
+        'relevant_history': '',    # 相关历史章节
+        'recent_summary': '',      # 近期概要
+        'recent_full': '',         # 最近完整内容
+        'stats': {}                # 统计信息
+    }
+    
+    try:
+        # 1. 获取所有已完成的前置章节（只取ID和序号）
+        all_chapters_result = await db.execute(
+            select(Chapter.id, Chapter.chapter_number, Chapter.title)
+            .where(Chapter.project_id == project_id)
+            .where(Chapter.chapter_number < current_chapter_number)
+            .where(Chapter.content != None)
+            .where(Chapter.content != "")
+            .order_by(Chapter.chapter_number)
+        )
+        all_chapters_info = all_chapters_result.all()
+        total_previous = len(all_chapters_info)
+        
+        if total_previous == 0:
+            logger.info("📚 这是第一章，无需构建前置上下文")
+            return context_parts
+        
+        logger.info(f"📚 开始构建智能上下文：共{total_previous}章前置内容")
+        
+        # 2. 构建故事骨架（每50章采样）
+        skeleton_chapters = []
+        if total_previous > 50:
+            sample_interval = 50
+            skeleton_indices = list(range(0, total_previous, sample_interval))
+            
+            for idx in skeleton_indices:
+                chapter_info = all_chapters_info[idx]
+                # 获取章节摘要（优先从chapter_summary记忆获取）
+                summary_result = await db.execute(
+                    select(StoryMemory.content)
+                    .where(StoryMemory.project_id == project_id)
+                    .where(StoryMemory.chapter_id == chapter_info.id)
+                    .where(StoryMemory.memory_type == 'chapter_summary')
+                    .limit(1)
+                )
+                summary_row = summary_result.scalar_one_or_none()
+                summary = summary_row if summary_row else "（无摘要）"
+                
+                skeleton_chapters.append({
+                    'number': chapter_info.chapter_number,
+                    'title': chapter_info.title,
+                    'summary': summary
+                })
+            
+            context_parts['story_skeleton'] = "【故事骨架】\n" + "\n".join([
+                f"第{ch['number']}章《{ch['title']}》：{ch['summary']}"
+                for ch in skeleton_chapters
+            ])
+            logger.info(f"  ✅ 故事骨架：采样{len(skeleton_chapters)}章（每50章1个）")
+        
+        # 3. 语义检索相关历史章节（使用chapter_summary记忆）
+        # 获取当前章节的大纲作为查询
+        current_outline_result = await db.execute(
+            select(Outline.content)
+            .where(Outline.project_id == project_id)
+            .where(Outline.order_index == current_chapter_number)
+        )
+        current_outline = current_outline_result.scalar_one_or_none()
+        
+        if current_outline and total_previous > 3:
+            # 使用记忆服务进行语义检索
+            relevant_memories = await memory_service.search_memories(
+                user_id=user_id,
+                project_id=project_id,
+                query=current_outline,
+                memory_types=['chapter_summary'],
+                limit=15,  # 检索15个最相关的章节
+                min_importance=0.0  # 不过滤重要性，依赖语义相关度
+            )
+            
+            if relevant_memories:
+                relevant_chapters_text = []
+                for mem in relevant_memories:
+                    # 获取章节信息
+                    chapter_result = await db.execute(
+                        select(Chapter.chapter_number, Chapter.title)
+                        .where(Chapter.id == mem['metadata'].get('chapter_id'))
+                    )
+                    chapter_info = chapter_result.first()
+                    if chapter_info:
+                        relevant_chapters_text.append(
+                            f"第{chapter_info.chapter_number}章《{chapter_info.title}》：{mem['content']} "
+                            f"(相关度:{mem['similarity']:.2f})"
+                        )
+                
+                context_parts['relevant_history'] = "【相关历史章节】\n" + "\n".join(relevant_chapters_text)
+                logger.info(f"  ✅ 相关历史：语义检索到{len(relevant_chapters_text)}章")
+        
+        # 4. 近期概要（最近30章，每章200字摘要）
+        recent_summary_count = min(30, total_previous)
+        recent_for_summary = all_chapters_info[-recent_summary_count:] if total_previous > 3 else []
+        
+        if recent_for_summary and len(recent_for_summary) > 3:  # 至少要有3章才做摘要
+            recent_summaries = []
+            for chapter_info in recent_for_summary[:-3]:  # 排除最后3章（它们会完整展示）
+                # 优先获取chapter_summary记忆
+                summary_result = await db.execute(
+                    select(StoryMemory.content)
+                    .where(StoryMemory.project_id == project_id)
+                    .where(StoryMemory.chapter_id == chapter_info.id)
+                    .where(StoryMemory.memory_type == 'chapter_summary')
+                    .limit(1)
+                )
+                summary = summary_result.scalar_one_or_none()
+                
+                if summary:
+                    recent_summaries.append(
+                        f"第{chapter_info.chapter_number}章《{chapter_info.title}》：{summary}"
+                    )
+            
+            if recent_summaries:
+                context_parts['recent_summary'] = "【近期章节概要】\n" + "\n".join(recent_summaries)
+                logger.info(f"  ✅ 近期概要：{len(recent_summaries)}章摘要")
+        
+        # 5. 最近完整内容（最近3章）
+        recent_full_count = min(3, total_previous)
+        recent_full_chapters = all_chapters_info[-recent_full_count:]
+        
+        # 获取完整内容
+        recent_full_texts = []
+        for chapter_info in recent_full_chapters:
+            chapter_result = await db.execute(
+                select(Chapter.content)
+                .where(Chapter.id == chapter_info.id)
+            )
+            content = chapter_result.scalar_one_or_none()
+            if content:
+                recent_full_texts.append(
+                    f"=== 第{chapter_info.chapter_number}章：{chapter_info.title} ===\n{content}"
+                )
+        
+        context_parts['recent_full'] = "【最近章节完整内容】\n" + "\n\n".join(recent_full_texts)
+        logger.info(f"  ✅ 最近完整：{len(recent_full_texts)}章全文")
+        
+        # 6. 统计信息
+        context_parts['stats'] = {
+            'total_previous': total_previous,
+            'skeleton_samples': len(skeleton_chapters),
+            'relevant_history': len(relevant_memories) if current_outline and total_previous > 3 else 0,
+            'recent_summaries': len(recent_summaries) if recent_for_summary and len(recent_for_summary) > 3 else 0,
+            'recent_full': len(recent_full_texts)
+        }
+        
+        # 计算总长度
+        total_length = sum([
+            len(context_parts['story_skeleton']),
+            len(context_parts['relevant_history']),
+            len(context_parts['recent_summary']),
+            len(context_parts['recent_full'])
+        ])
+        context_parts['stats']['total_length'] = total_length
+        
+        logger.info(f"📊 智能上下文构建完成：总长度 {total_length} 字符")
+        
+    except Exception as e:
+        logger.error(f"❌ 构建智能上下文失败: {str(e)}", exc_info=True)
+    
+    return context_parts
+
+
@router.get("/{chapter_id}/can-generate", summary="检查章节是否可以生成")
 async def check_can_generate(
    chapter_id: str,
@@ -489,7 +681,8 @@ async def analyze_chapter_background(
            analysis=analysis_result,
            chapter_id=chapter_id,
            chapter_number=chapter.chapter_number,
-            chapter_content=chapter.content or ""
+            chapter_content=chapter.content or "",
+            chapter_title=chapter.title or ""
        )
        
        # 先删除该章节的旧记忆（写操作，需要锁）
@@ -742,27 +935,34 @@ async def generate_chapter_content_stream(
                else:
                    logger.info("未指定写作风格，使用原始提示词")
                
-                # 构建前置章节内容上下文（使用之前保存的数据）
+                # 🚀 使用智能上下文构建（支持海量章节）
+                smart_context = await build_smart_chapter_context(
+                    db=db_session,
+                    project_id=project.id,
+                    current_chapter_number=current_chapter.chapter_number,
+                    user_id=current_user_id
+                )
+                
+                # 组装上下文
                previous_content = ""
-                if previous_chapters_data:
-                    recent_chapters = previous_chapters_data[-3:] if len(previous_chapters_data) > 3 else previous_chapters_data
-                    early_chapters = previous_chapters_data[:-3] if len(previous_chapters_data) > 3 else []
+                if smart_context['story_skeleton']:
+                    previous_content += smart_context['story_skeleton'] + "\n\n"
+                if smart_context['relevant_history']:
+                    previous_content += smart_context['relevant_history'] + "\n\n"
+                if smart_context['recent_summary']:
+                    previous_content += smart_context['recent_summary'] + "\n\n"
+                if smart_context['recent_full']:
+                    previous_content += smart_context['recent_full']
                
-                    if early_chapters:
-                        early_summary = "【前期剧情概要】\n" + "\n".join([
-                            f"第{ch['chapter_number']}章《{ch['title']}》：{ch['content'][:200] if ch['content'] else ''}..."
-                            for ch in early_chapters
-                        ])
-                        previous_content += early_summary + "\n\n"
-                    
-                    if recent_chapters:
-                        recent_content = "【最近章节完整内容】\n" + "\n\n".join([
-                            f"=== 第{ch['chapter_number']}章：{ch['title']} ===\n{ch['content']}"
-                            for ch in recent_chapters
-                        ])
-                        previous_content += recent_content
-                    
-                    logger.info(f"构建前置上下文：{len(early_chapters)}章摘要 + {len(recent_chapters)}章完整内容")
+                # 日志输出统计信息
+                stats = smart_context['stats']
+                logger.info(f"📊 智能上下文统计:")
+                logger.info(f"  - 前置章节总数: {stats.get('total_previous', 0)}")
+                logger.info(f"  - 故事骨架采样: {stats.get('skeleton_samples', 0)}章")
+                logger.info(f"  - 相关历史检索: {stats.get('relevant_history', 0)}章")
+                logger.info(f"  - 近期章节概要: {stats.get('recent_summaries', 0)}章")
+                logger.info(f"  - 最近完整内容: {stats.get('recent_full', 0)}章")
+                logger.info(f"  - 上下文总长度: {stats.get('total_length', 0)}字符")
                
                # 🧠 构建记忆增强上下文
                logger.info(f"🧠 开始构建记忆增强上下文...")
@@ -477,6 +477,92 @@ async def _generate_new_outline(
    return OutlineListResponse(total=len(outlines), items=outlines)


+async def _build_smart_outline_context(
+    latest_outlines: List[Outline],
+    user_id: str,
+    project_id: str
+) -> dict:
+    """
+    智能构建大纲续写上下文（支持海量大纲场景）
+    
+    策略：
+    1. 故事骨架：每50章采样1章（仅标题）
+    2. 近期概要：最近20章（标题+简要）
+    3. 最近详细：最近2章（完整内容）
+    
+    Args:
+        latest_outlines: 所有已有大纲列表
+        user_id: 用户ID
+        project_id: 项目ID
+        
+    Returns:
+        包含压缩后上下文的字典
+    """
+    total_count = len(latest_outlines)
+    
+    context = {
+        'story_skeleton': '',      # 故事骨架（标题列表）
+        'recent_summary': '',      # 近期概要（标题+内容前50字）
+        'recent_detail': '',       # 最近详细（完整内容）
+        'stats': {
+            'total': total_count,
+            'skeleton_samples': 0,
+            'recent_summaries': 0,
+            'recent_details': 0
+        }
+    }
+    
+    try:
+        # 1. 故事骨架（每50章采样，仅标题）
+        if total_count > 50:
+            sample_interval = 50
+            skeleton_indices = list(range(0, total_count, sample_interval))
+            skeleton_titles = [
+                f"第{latest_outlines[idx].order_index}章: {latest_outlines[idx].title}"
+                for idx in skeleton_indices
+            ]
+            context['story_skeleton'] = "【故事骨架】\n" + "\n".join(skeleton_titles)
+            context['stats']['skeleton_samples'] = len(skeleton_titles)
+            logger.info(f"  ✅ 故事骨架：采样{len(skeleton_titles)}章标题")
+        
+        # 2. 近期概要（最近20章，标题+内容前50字）
+        recent_summary_count = min(20, total_count)
+        if recent_summary_count > 2:  # 排除最后2章（它们会完整展示）
+            recent_for_summary = latest_outlines[-recent_summary_count:-2]
+            recent_summaries = [
+                f"第{o.order_index}章《{o.title}》: {o.content[:50]}..."
+                for o in recent_for_summary
+            ]
+            context['recent_summary'] = "【近期大纲概要】\n" + "\n".join(recent_summaries)
+            context['stats']['recent_summaries'] = len(recent_summaries)
+            logger.info(f"  ✅ 近期概要：{len(recent_summaries)}章")
+        
+        # 3. 最近详细（最近2章，完整内容）
+        recent_detail_count = min(2, total_count)
+        recent_details = latest_outlines[-recent_detail_count:]
+        detail_texts = [
+            f"第{o.order_index}章《{o.title}》: {o.content}"
+            for o in recent_details
+        ]
+        context['recent_detail'] = "【最近大纲详情】\n" + "\n".join(detail_texts)
+        context['stats']['recent_details'] = len(detail_texts)
+        logger.info(f"  ✅ 最近详细：{len(detail_texts)}章")
+        
+        # 计算总长度
+        total_length = sum([
+            len(context['story_skeleton']),
+            len(context['recent_summary']),
+            len(context['recent_detail'])
+        ])
+        context['stats']['total_length'] = total_length
+        logger.info(f"📊 大纲上下文总长度: {total_length} 字符")
+        
+    except Exception as e:
+        logger.error(f"❌ 构建智能大纲上下文失败: {str(e)}", exc_info=True)
+    
+    return context
+
+
 async def _continue_outline(
    request: OutlineGenerateRequest,
    project: Project,
@@ -537,25 +623,35 @@ async def _continue_outline(
        )
        latest_outlines = latest_result.scalars().all()
        
-        # 获取最近2章的剧情
-        recent_outlines = latest_outlines[-2:] if len(latest_outlines) >= 2 else latest_outlines
-        recent_plot = "\n".join([
-            f"第{o.order_index}章《{o.title}》: {o.content}"
-            for o in recent_outlines
-        ])
+        # 🚀 使用智能上下文构建（支持海量大纲）
+        smart_context = await _build_smart_outline_context(
+            latest_outlines=latest_outlines,
+            user_id=user_id,
+            project_id=project.id
+        )
        
-        # 全部章节概览
-        all_chapters_brief = "\n".join([
-            f"第{o.order_index}章: {o.title}"
-            for o in latest_outlines
-        ])
+        # 组装上下文字符串
+        all_chapters_brief = ""
+        if smart_context['story_skeleton']:
+            all_chapters_brief += smart_context['story_skeleton'] + "\n\n"
+        if smart_context['recent_summary']:
+            all_chapters_brief += smart_context['recent_summary'] + "\n\n"
+        
+        # 最近详细内容作为 recent_plot
+        recent_plot = smart_context['recent_detail']
+        
+        # 日志统计
+        stats = smart_context['stats']
+        logger.info(f"📊 大纲上下文统计: 总数{stats['total']}, 骨架{stats['skeleton_samples']}, "
+                   f"概要{stats['recent_summaries']}, 详细{stats['recent_details']}, "
+                   f"长度{stats['total_length']}字符")
        
        # 🧠 构建记忆增强上下文（仅续写模式需要）
        memory_context = None
        try:
            logger.info(f"🧠 为第{batch_num + 1}批构建记忆上下文...")
            # 使用最近一章的大纲作为查询
-            query_outline = recent_outlines[-1].content if recent_outlines else ""
+            query_outline = latest_outlines[-1].content if latest_outlines else ""
            memory_context = await memory_service.build_context_for_generation(
                user_id=user_id,
                project_id=project.id,
@@ -952,18 +1048,28 @@ async def continue_outline_generator(
            )
            latest_outlines = latest_result.scalars().all()
            
-            # 获取最近2章的剧情
-            recent_outlines = latest_outlines[-2:] if len(latest_outlines) >= 2 else latest_outlines
-            recent_plot = "\n".join([
-                f"第{o.order_index}章《{o.title}》: {o.content}"
-                for o in recent_outlines
-            ])
+            # 🚀 使用智能上下文构建（支持海量大纲）
+            smart_context = await _build_smart_outline_context(
+                latest_outlines=latest_outlines,
+                user_id=user_id,
+                project_id=project_id
+            )
            
-            # 全部章节概览
-            all_chapters_brief = "\n".join([
-                f"第{o.order_index}章: {o.title}"
-                for o in latest_outlines
-            ])
+            # 组装上下文字符串
+            all_chapters_brief = ""
+            if smart_context['story_skeleton']:
+                all_chapters_brief += smart_context['story_skeleton'] + "\n\n"
+            if smart_context['recent_summary']:
+                all_chapters_brief += smart_context['recent_summary'] + "\n\n"
+            
+            # 最近详细内容作为 recent_plot
+            recent_plot = smart_context['recent_detail']
+            
+            # 日志统计
+            stats = smart_context['stats']
+            logger.info(f"📊 批次{batch_num + 1}大纲上下文: 总数{stats['total']}, "
+                       f"骨架{stats['skeleton_samples']}, 概要{stats['recent_summaries']}, "
+                       f"详细{stats['recent_details']}, 长度{stats['total_length']}字符")
            
            # 🧠 构建记忆增强上下文
            memory_context = None
@@ -972,7 +1078,7 @@ async def continue_outline_generator(
                    f"🧠 构建记忆上下文...",
                    batch_progress + 3
                )
-                query_outline = recent_outlines[-1].content if recent_outlines else ""
+                query_outline = latest_outlines[-1].content if latest_outlines else ""
                memory_context = await memory_service.build_context_for_generation(
                    user_id=user_id,
                    project_id=project_id,
@@ -298,7 +298,8 @@ class PlotAnalyzer:
        analysis: Dict[str, Any],
        chapter_id: str,
        chapter_number: int,
-        chapter_content: str = ""
+        chapter_content: str = "",
+        chapter_title: str = ""
    ) -> List[Dict[str, Any]]:
        """
        从分析结果中提取记忆片段
@@ -308,6 +309,7 @@ class PlotAnalyzer:
            chapter_id: 章节ID
            chapter_number: 章节号
            chapter_content: 章节完整内容(用于计算位置)
+            chapter_title: 章节标题
        
        Returns:
            记忆片段列表
@@ -315,6 +317,38 @@ class PlotAnalyzer:
        memories = []
        
        try:
+            # 【新增】0. 提取章节摘要作为记忆（用于语义检索相关章节）
+            chapter_summary = ""
+            
+            # 尝试从分析结果获取摘要
+            if analysis.get('summary'):
+                chapter_summary = analysis.get('summary')
+            # 或者从情节点组合生成摘要
+            elif analysis.get('plot_points'):
+                plot_summaries = [p.get('content', '') for p in analysis.get('plot_points', [])[:3]]
+                chapter_summary = "；".join(plot_summaries)
+            # 或者使用内容前300字
+            elif chapter_content:
+                chapter_summary = chapter_content[:300] + ("..." if len(chapter_content) > 300 else "")
+            
+            # 如果有摘要，添加到记忆中
+            if chapter_summary:
+                memories.append({
+                    'type': 'chapter_summary',
+                    'content': chapter_summary,
+                    'title': f"第{chapter_number}章《{chapter_title}》摘要",
+                    'metadata': {
+                        'chapter_id': chapter_id,
+                        'chapter_number': chapter_number,
+                        'importance_score': 0.6,  # 中等重要性
+                        'tags': ['摘要', '章节概览', chapter_title],
+                        'is_foreshadow': 0,
+                        'text_position': 0,
+                        'text_length': len(chapter_summary)
+                    }
+                })
+                logger.info(f"  ✅ 添加章节摘要记忆: {len(chapter_summary)}字")
+            
            # 1. 提取钩子作为记忆
            for i, hook in enumerate(analysis.get('hooks', [])):
                if hook.get('strength', 0) >= 6:  # 只保存强度>=6的钩子