fix: 修复章节内容分析导致伏笔回收数据重复问题

This commit is contained in:
mumu
2026-02-07 15:39:15 +08:00
parent 9ce866ffa5
commit f7eef4d89d
4 changed files with 96 additions and 26 deletions
@@ -838,19 +838,22 @@ class OneToOneContextBuilder:
else: else:
logger.info(f" ⚠️ P2-伏笔提醒: 无") logger.info(f" ⚠️ P2-伏笔提醒: 无")
# 2. 根据角色名检索相关记忆(相关度>0.6 # 2. 根据大纲内容检索相关记忆(相关度>0.4
if character_names and self.memory_service: if self.memory_service and context.chapter_outline:
try: try:
query_text = " ".join(character_names) # 使用大纲内容作为查询(截取前500字符以避免过长)
query_text = context.chapter_outline[:500].replace('\n', ' ')
logger.info(f" 🔍 记忆查询关键词: {query_text[:100]}...")
relevant_memories = await self.memory_service.search_memories( relevant_memories = await self.memory_service.search_memories(
user_id=user_id, user_id=user_id,
project_id=project.id, project_id=project.id,
query=query_text, query=query_text,
limit=10, limit=15,
min_importance=0.0 min_importance=0.0
) )
# 降低相关度阈值到0.4,提高召回率
filtered_memories = [ filtered_memories = [
mem for mem in relevant_memories mem for mem in relevant_memories
if mem.get('similarity', 0) > 0.6 if mem.get('similarity', 0) > 0.6
@@ -858,23 +861,23 @@ class OneToOneContextBuilder:
if filtered_memories: if filtered_memories:
memory_lines = ["【相关记忆】"] memory_lines = ["【相关记忆】"]
for mem in filtered_memories: for mem in filtered_memories[:10]: # 最多显示10条
similarity = mem.get('similarity', 0) similarity = mem.get('similarity', 0)
content = mem.get('content', '')[:100] content = mem.get('content', '')[:100]
memory_lines.append(f"- (相关度:{similarity:.2f}) {content}") memory_lines.append(f"- (相关度:{similarity:.2f}) {content}")
context.relevant_memories = "\n".join(memory_lines) context.relevant_memories = "\n".join(memory_lines)
logger.info(f" ✅ P2-相关记忆: {len(filtered_memories)}条 (相关度>0.6)") logger.info(f" ✅ P2-相关记忆: {len(filtered_memories)}条 (相关度>0.4, 共搜索{len(relevant_memories)})")
else: else:
context.relevant_memories = None context.relevant_memories = None
logger.info(f" ⚠️ P2-相关记忆: 无符合条件的记忆") logger.info(f" ⚠️ P2-相关记忆: 无符合条件的记忆 (共搜索到{len(relevant_memories)}条)")
except Exception as e: except Exception as e:
logger.error(f" ❌ 检索相关记忆失败: {str(e)}") logger.error(f" ❌ 检索相关记忆失败: {str(e)}")
context.relevant_memories = None context.relevant_memories = None
else: else:
context.relevant_memories = None context.relevant_memories = None
logger.info(f" ⚠️ P2-相关记忆: 无角色或记忆服务不可用") logger.info(f" ⚠️ P2-相关记忆: 无大纲内容或记忆服务不可用")
# === 统计信息 === # === 统计信息 ===
context.context_stats = { context.context_stats = {
+79 -17
View File
@@ -1211,6 +1211,15 @@ class ForeshadowService:
# 重新获取完整的伏笔对象 # 重新获取完整的伏笔对象
existing = await self.get_foreshadow(db, existing.get('id')) existing = await self.get_foreshadow(db, existing.get('id'))
# 检查伏笔是否已被回收(防止重复回收)
if existing:
if existing.status == "resolved" and existing.actual_resolve_chapter_number == chapter_number:
logger.info(f"️ 伏笔已在本章回收过,跳过: {existing.title}")
continue
elif existing.status == "resolved":
logger.warning(f"⚠️ 伏笔已在第{existing.actual_resolve_chapter_number}章回收,跳过: {existing.title}")
continue
# 执行回收 # 执行回收
if existing and existing.status == "planted": if existing and existing.status == "planted":
# 更新为已回收状态 # 更新为已回收状态
@@ -1224,20 +1233,42 @@ class ForeshadowService:
existing.resolution_text = fs_data.get("content") existing.resolution_text = fs_data.get("content")
await db.flush() await db.flush()
await db.refresh(existing)
stats["resolved_count"] += 1 stats["resolved_count"] += 1
stats["updated_ids"].append(existing.id) stats["updated_ids"].append(existing.id)
if matched_by_content: if matched_by_content:
stats["matched_by_content"] += 1 stats["matched_by_content"] += 1
logger.info(f"✅ 自动回收伏笔: {existing.title} (ID: {existing.id})") logger.info(f"✅ 自动回收伏笔: {existing.title} (ID: {existing.id}, status: {existing.status})")
# 从待匹配列表中移除已回收的伏笔 # 从待匹配列表中移除已回收的伏笔
planted_foreshadows = [f for f in planted_foreshadows if f['id'] != existing.id] planted_foreshadows = [f for f in planted_foreshadows if f['id'] != existing.id]
elif existing: elif existing:
logger.warning(f"⚠️ 伏笔状态不是planted,跳过回收: {existing.title} (status: {existing.status})") logger.warning(f"⚠️ 伏笔状态不是planted,跳过回收: {existing.title} (status: {existing.status})")
else: else:
# 创建新回收记录(未能匹配到已埋入伏笔)
fs_title = fs_data.get("title", fs_data.get("content", "")[:30]) fs_title = fs_data.get("title", fs_data.get("content", "")[:30])
logger.warning(f"⚠️ 未能匹配到已埋入伏笔,创建新的回收记录: {fs_title}")
reference_chapter = fs_data.get("reference_chapter") reference_chapter = fs_data.get("reference_chapter")
# 检查是否已存在相同的回收记录(防止重复创建)
duplicate_check = await db.execute(
select(Foreshadow).where(
and_(
Foreshadow.project_id == project_id,
Foreshadow.title == fs_title,
Foreshadow.actual_resolve_chapter_number == chapter_number,
Foreshadow.source_type == "analysis",
Foreshadow.status == "resolved"
)
)
)
duplicate_fs = duplicate_check.scalar_one_or_none()
if duplicate_fs:
logger.info(f"️ 已存在相同的回收记录,跳过: {fs_title}")
continue
logger.warning(f"⚠️ 未能匹配到已埋入伏笔,创建新的回收记录: {fs_title}")
new_resolved_foreshadow = Foreshadow( new_resolved_foreshadow = Foreshadow(
id=str(uuid.uuid4()), id=str(uuid.uuid4()),
project_id=project_id, project_id=project_id,
@@ -1286,32 +1317,46 @@ class ForeshadowService:
fs_index = analysis_foreshadows.index(fs_data) fs_index = analysis_foreshadows.index(fs_data)
source_memory_id = f"analysis_{analysis_id}_{fs_index}" source_memory_id = f"analysis_{analysis_id}_{fs_index}"
# 检查是否已存在(可能已经通过 sync_from_analysis 创建 # 检查是否已存在(防止重复分析创建重复记录
existing_check = await db.execute( existing_check = await db.execute(
select(Foreshadow).where( select(Foreshadow).where(
and_( or_(
Foreshadow.project_id == project_id, # 方式1:通过source_memory_id精确匹配
Foreshadow.source_memory_id == source_memory_id and_(
Foreshadow.project_id == project_id,
Foreshadow.source_memory_id == source_memory_id
),
# 方式2:通过标题+章节号匹配
and_(
Foreshadow.project_id == project_id,
Foreshadow.title == fs_title,
Foreshadow.plant_chapter_number == chapter_number,
Foreshadow.source_type == "analysis",
Foreshadow.status == "planted"
)
) )
) )
) )
existing_fs = existing_check.scalar_one_or_none() existing_fs = existing_check.scalar_one_or_none()
if existing_fs: if existing_fs:
# 更新已存在的伏笔,避免重复创建
existing_fs.title = fs_title existing_fs.title = fs_title
existing_fs.content = fs_data.get("content", existing_fs.content) existing_fs.content = fs_data.get("content", existing_fs.content)
existing_fs.strength = fs_data.get("strength", existing_fs.strength) existing_fs.strength = fs_data.get("strength", existing_fs.strength)
existing_fs.subtlety = fs_data.get("subtlety", existing_fs.subtlety) existing_fs.subtlety = fs_data.get("subtlety", existing_fs.subtlety)
existing_fs.hint_text = fs_data.get("keyword", existing_fs.hint_text) existing_fs.hint_text = fs_data.get("keyword", existing_fs.hint_text)
existing_fs.target_resolve_chapter_number = fs_data.get("estimated_resolve_chapter", existing_fs.target_resolve_chapter_number) existing_fs.target_resolve_chapter_number = fs_data.get("estimated_resolve_chapter", existing_fs.target_resolve_chapter_number)
# 确保source_memory_id是最新的
existing_fs.source_memory_id = source_memory_id
existing_fs.source_analysis_id = analysis_id
await db.flush() await db.flush()
logger.info(f"📝 更新已存在伏笔(避免重复): {fs_title}") logger.info(f"📝 更新已存在伏笔(避免重复): {fs_title} (ID: {existing_fs.id})")
else: else:
# 创建新伏笔(使用统一的标识符格式) # 创建新伏笔
# 🔧 修复Bug#7:如果AI没有填写estimated_resolve_chapter,提供合理的默认值
estimated_resolve = fs_data.get("estimated_resolve_chapter") estimated_resolve = fs_data.get("estimated_resolve_chapter")
if estimated_resolve is None: if estimated_resolve is None:
# 根据伏笔类型和长线属性计算默认回收章节 # 根据伏笔类型计算默认回收章节
if fs_data.get("is_long_term", False): if fs_data.get("is_long_term", False):
estimated_resolve = chapter_number + 15 estimated_resolve = chapter_number + 15
else: else:
@@ -1455,11 +1500,12 @@ class ForeshadowService:
通过内容相似度匹配伏笔(备用机制) 通过内容相似度匹配伏笔(备用机制)
匹配策略(按优先级): 匹配策略(按优先级):
1. 标题完全匹配 1. 标题完全匹配(权重最高)
2. 标题部分匹配(包含关系) 2. 标题部分匹配(包含关系)
3. 关键词匹配 3. 标题关键词匹配(去除"回收"等后缀)
4. 内容关键词匹配 4. 关键词匹配
5. 相关角色匹配 + 分类匹配 5. 内容关键词匹配
6. 相关角色匹配 + 分类匹配
Args: Args:
resolved_fs_data: 分析结果中的回收伏笔数据 resolved_fs_data: 分析结果中的回收伏笔数据
@@ -1479,6 +1525,14 @@ class ForeshadowService:
resolved_characters = set(resolved_fs_data.get("related_characters", [])) resolved_characters = set(resolved_fs_data.get("related_characters", []))
reference_chapter = resolved_fs_data.get("reference_chapter") reference_chapter = resolved_fs_data.get("reference_chapter")
# 处理标题后缀(兜底机制)
resolved_title_clean = resolved_title
for suffix in ["回收", "揭示", "解答", "兑现"]:
if resolved_title.endswith(suffix):
resolved_title_clean = resolved_title[:-len(suffix)]
logger.debug(f"🔍 去除标题后缀: '{resolved_title}' -> '{resolved_title_clean}'")
break
best_match = None best_match = None
best_score = 0.0 best_score = 0.0
@@ -1490,17 +1544,25 @@ class ForeshadowService:
fs_characters = set(fs.get("related_characters", [])) fs_characters = set(fs.get("related_characters", []))
fs_plant_chapter = fs.get("plant_chapter_number") fs_plant_chapter = fs.get("plant_chapter_number")
# 策略1: 标题完全匹配(最高分) # 策略1: 标题匹配
if resolved_title and fs_title: if resolved_title and fs_title:
if resolved_title == fs_title: if resolved_title == fs_title:
score = 1.0 score = 1.0
logger.debug(f"🎯 标题完全匹配: '{resolved_title}' == '{fs_title}'")
elif resolved_title_clean and resolved_title_clean == fs_title:
score = 0.95
logger.debug(f"🎯 清理标题匹配: '{resolved_title_clean}' == '{fs_title}'")
elif resolved_title in fs_title or fs_title in resolved_title: elif resolved_title in fs_title or fs_title in resolved_title:
# 标题包含关系
score = max(score, 0.8) score = max(score, 0.8)
logger.debug(f"🔍 标题包含匹配: '{resolved_title}' <-> '{fs_title}'")
elif resolved_title_clean and (resolved_title_clean in fs_title or fs_title in resolved_title_clean):
score = max(score, 0.75)
logger.debug(f"🔍 清理标题包含匹配: '{resolved_title_clean}' <-> '{fs_title}'")
else: else:
# 计算标题词重叠
title_overlap = self._calculate_word_overlap(resolved_title, fs_title) title_overlap = self._calculate_word_overlap(resolved_title, fs_title)
score = max(score, title_overlap * 0.7) score = max(score, title_overlap * 0.7)
if title_overlap > 0.3:
logger.debug(f"📊 标题词重叠: overlap={title_overlap:.2f}")
# 策略2: 关键词匹配 # 策略2: 关键词匹配
if resolved_keyword and fs_content: if resolved_keyword and fs_content:
+1 -1
View File
@@ -87,7 +87,7 @@ class PlotAnalyzer:
) )
last_error = None last_error = None
logger.debug(f"章节分析提示词{prompt}")
for attempt in range(1, max_retries + 1): for attempt in range(1, max_retries + 1):
try: try:
# 调用AI进行分析 # 调用AI进行分析
+5
View File
@@ -972,12 +972,17 @@ class PromptService:
每个伏笔需要: 每个伏笔需要:
- **title**:简洁标题(10-20字,概括伏笔核心) - **title**:简洁标题(10-20字,概括伏笔核心)
- ⚠️ 回收伏笔时,标题应与原伏笔标题保持一致,不要添加"回收"等后缀
- 例如:原伏笔标题是"绿头发的视觉符号",回收时标题仍为"绿头发的视觉符号",而非"绿头发的视觉符号回收"
- **content**:详细描述伏笔内容和预期作用 - **content**:详细描述伏笔内容和预期作用
- **type**planted(埋下)或 resolved(回收) - **type**planted(埋下)或 resolved(回收)
- **strength**:强度1-10(对读者的吸引力) - **strength**:强度1-10(对读者的吸引力)
- **subtlety**:隐藏度1-10(越高越隐蔽) - **subtlety**:隐藏度1-10(越高越隐蔽)
- **reference_chapter**:回收时引用的原埋入章节号,埋下时为null - **reference_chapter**:回收时引用的原埋入章节号,埋下时为null
- **reference_foreshadow_id**:【回收时必填】被回收伏笔的ID(从已埋入伏笔列表中选择),埋下时为null - **reference_foreshadow_id**:【回收时必填】被回收伏笔的ID(从已埋入伏笔列表中选择),埋下时为null
- 🔴 重要:回收伏笔时,必须从【已埋入伏笔列表】中找到对应的伏笔ID并填写
- 如果列表中有标注【ID: xxx】的伏笔,回收时必须使用该ID
- 如果无法确定是哪个伏笔,才填写null(但应尽量避免)
- **keyword**:【必填】从原文逐字复制8-25字的定位文本 - **keyword**:【必填】从原文逐字复制8-25字的定位文本
- **category**:分类(identity=身世/mystery=悬念/item=物品/relationship=关系/event=事件/ability=能力/prophecy=预言) - **category**:分类(identity=身世/mystery=悬念/item=物品/relationship=关系/event=事件/ability=能力/prophecy=预言)
- **is_long_term**:是否长线伏笔(跨10章以上回收为true) - **is_long_term**:是否长线伏笔(跨10章以上回收为true)