feature: 新增支持拆书自选保留章节数,或者整本拆书功能
This commit is contained in:
@@ -15,6 +15,7 @@ from app.schemas.book_import import (
|
||||
BookImportPreviewResponse,
|
||||
BookImportRetryRequest,
|
||||
BookImportTaskCreateResponse,
|
||||
BookImportTaskCreateRequest,
|
||||
BookImportTaskStatusResponse,
|
||||
)
|
||||
from app.services.book_import_service import book_import_service
|
||||
@@ -33,6 +34,8 @@ async def create_book_import_task(
|
||||
project_id: str | None = Form(default=None, description="兼容参数:当前版本固定新建项目,不支持传入"),
|
||||
create_new_project: bool = Form(default=True, description="兼容参数:当前版本仅支持 true"),
|
||||
import_mode: str = Form(default="append", description="导入模式:append/overwrite"),
|
||||
extract_mode: str = Form(default="tail", description="解析范围:tail=截取末章,full=整本"),
|
||||
tail_chapter_count: int = Form(default=10, description="当 extract_mode=tail 时,截取末尾章节数,需为5的倍数;超过50按整本拆处理"),
|
||||
):
|
||||
user_id = getattr(request.state, "user_id", None)
|
||||
if not user_id:
|
||||
@@ -44,11 +47,26 @@ async def create_book_import_task(
|
||||
if import_mode not in {"append", "overwrite"}:
|
||||
raise HTTPException(status_code=400, detail="import_mode 仅支持 append 或 overwrite")
|
||||
|
||||
if extract_mode not in {"tail", "full"}:
|
||||
raise HTTPException(status_code=400, detail="extract_mode 仅支持 tail 或 full")
|
||||
if tail_chapter_count < 5:
|
||||
raise HTTPException(status_code=400, detail="tail_chapter_count 不能小于 5")
|
||||
if tail_chapter_count % 5 != 0:
|
||||
raise HTTPException(status_code=400, detail="tail_chapter_count 必须是 5 的倍数")
|
||||
|
||||
if tail_chapter_count > 50:
|
||||
extract_mode = "full"
|
||||
|
||||
if project_id:
|
||||
raise HTTPException(status_code=400, detail="当前仅支持新建项目导入,不支持指定 project_id")
|
||||
if not create_new_project:
|
||||
raise HTTPException(status_code=400, detail="当前仅支持新建项目导入")
|
||||
|
||||
create_payload = BookImportTaskCreateRequest(
|
||||
extract_mode=extract_mode,
|
||||
tail_chapter_count=tail_chapter_count,
|
||||
)
|
||||
|
||||
content = await file.read()
|
||||
if len(content) > MAX_TXT_SIZE:
|
||||
raise HTTPException(status_code=413, detail="文件大小超过 50MB 限制")
|
||||
@@ -60,6 +78,8 @@ async def create_book_import_task(
|
||||
project_id=None,
|
||||
create_new_project=True,
|
||||
import_mode=import_mode,
|
||||
extract_mode=create_payload.extract_mode,
|
||||
tail_chapter_count=create_payload.tail_chapter_count,
|
||||
)
|
||||
return task
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ TaskStatus = Literal["pending", "running", "completed", "failed", "cancelled"]
|
||||
ImportMode = Literal["append", "overwrite"]
|
||||
ExtractLevel = Literal["basic", "standard", "deep"]
|
||||
WarningLevel = Literal["info", "warning", "error"]
|
||||
BookImportExtractMode = Literal["tail", "full"]
|
||||
|
||||
|
||||
class BookImportWarning(BaseModel):
|
||||
@@ -45,6 +46,12 @@ class BookImportOutline(BaseModel):
|
||||
structure: Optional[dict[str, Any]] = Field(None, description="结构化大纲(与系统大纲生成结构一致)")
|
||||
|
||||
|
||||
class BookImportTaskCreateRequest(BaseModel):
|
||||
"""创建拆书任务请求"""
|
||||
extract_mode: BookImportExtractMode = Field(default="tail", description="提取范围:tail=截取末章,full=整本")
|
||||
tail_chapter_count: int = Field(default=10, ge=5, le=9999, description="当 extract_mode=tail 时,截取末尾章节数;需为5的倍数,超过50将按整本处理")
|
||||
|
||||
|
||||
class BookImportTaskCreateResponse(BaseModel):
|
||||
"""创建任务响应"""
|
||||
task_id: str
|
||||
@@ -89,4 +96,4 @@ class BookImportApplyResponse(BaseModel):
|
||||
|
||||
class BookImportRetryRequest(BaseModel):
|
||||
"""重试失败步骤请求"""
|
||||
steps: list[str] = Field(..., min_length=1, description="需要重试的步骤名列表,如 world_building / career_system / characters")
|
||||
steps: list[str] = Field(..., min_length=1, description="需要重试的步骤名列表,如 world_building / career_system / characters")
|
||||
|
||||
@@ -34,6 +34,7 @@ from app.schemas.book_import import (
|
||||
BookImportApplyRequest,
|
||||
BookImportApplyResponse,
|
||||
BookImportChapter,
|
||||
BookImportExtractMode,
|
||||
BookImportOutline,
|
||||
BookImportPreviewResponse,
|
||||
BookImportTaskCreateResponse,
|
||||
@@ -65,6 +66,8 @@ class _BookImportTask:
|
||||
project_id: Optional[str]
|
||||
create_new_project: bool
|
||||
import_mode: str
|
||||
extract_mode: BookImportExtractMode = "tail"
|
||||
tail_chapter_count: int = 10
|
||||
status: str = "pending"
|
||||
progress: int = 0
|
||||
message: Optional[str] = "任务已创建"
|
||||
@@ -95,7 +98,16 @@ class BookImportService:
|
||||
project_id: Optional[str],
|
||||
create_new_project: bool,
|
||||
import_mode: str,
|
||||
extract_mode: BookImportExtractMode = "tail",
|
||||
tail_chapter_count: int = 10,
|
||||
) -> BookImportTaskCreateResponse:
|
||||
normalized_tail_count = max(5, int(tail_chapter_count))
|
||||
normalized_extract_mode = extract_mode
|
||||
if normalized_tail_count % 5 != 0:
|
||||
normalized_tail_count = ((normalized_tail_count + 4) // 5) * 5
|
||||
if normalized_tail_count > 50:
|
||||
normalized_extract_mode = "full"
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
task = _BookImportTask(
|
||||
task_id=task_id,
|
||||
@@ -104,6 +116,8 @@ class BookImportService:
|
||||
project_id=project_id,
|
||||
create_new_project=create_new_project,
|
||||
import_mode=import_mode,
|
||||
extract_mode=normalized_extract_mode,
|
||||
tail_chapter_count=normalized_tail_count,
|
||||
)
|
||||
async with self._tasks_lock:
|
||||
self._tasks[task_id] = task
|
||||
@@ -150,15 +164,17 @@ class BookImportService:
|
||||
}
|
||||
|
||||
warnings = list(task.preview.warnings) if task.preview else []
|
||||
chapters_to_import, outlines_to_import, was_trimmed = self._trim_last_ten_for_apply(
|
||||
chapters_to_import, outlines_to_import, was_trimmed = self._select_chapters_for_import(
|
||||
chapters=payload.chapters,
|
||||
outlines=payload.outlines,
|
||||
extract_mode=task.extract_mode,
|
||||
tail_chapter_count=task.tail_chapter_count,
|
||||
)
|
||||
if was_trimmed:
|
||||
warnings.append(
|
||||
BookImportWarning(
|
||||
code="apply_trimmed_to_last_ten",
|
||||
message=f"导入阶段已强制仅保留最后 {len(chapters_to_import)} 章",
|
||||
code="apply_trimmed_for_extract_mode",
|
||||
message=f"导入阶段已按解析配置仅保留 {len(chapters_to_import)} 章",
|
||||
level="info",
|
||||
)
|
||||
)
|
||||
@@ -248,15 +264,17 @@ class BookImportService:
|
||||
}
|
||||
|
||||
warnings = list(task.preview.warnings) if task.preview else []
|
||||
chapters_to_import, outlines_to_import, was_trimmed = self._trim_last_ten_for_apply(
|
||||
chapters_to_import, outlines_to_import, was_trimmed = self._select_chapters_for_import(
|
||||
chapters=payload.chapters,
|
||||
outlines=payload.outlines,
|
||||
extract_mode=task.extract_mode,
|
||||
tail_chapter_count=task.tail_chapter_count,
|
||||
)
|
||||
if was_trimmed:
|
||||
warnings.append(
|
||||
BookImportWarning(
|
||||
code="apply_trimmed_to_last_ten",
|
||||
message=f"导入阶段已强制仅保留最后 {len(chapters_to_import)} 章",
|
||||
code="apply_trimmed_for_extract_mode",
|
||||
message=f"导入阶段已按解析配置仅保留 {len(chapters_to_import)} 章",
|
||||
level="info",
|
||||
)
|
||||
)
|
||||
@@ -580,7 +598,7 @@ class BookImportService:
|
||||
return
|
||||
|
||||
try:
|
||||
# 进度分配:编码识别 5%,文本清洗 10%,章节切分 15%,截取末10章 18%,AI反向生成 20%-95%,完成 100%
|
||||
# 进度分配:编码识别 5%,文本清洗 10%,章节切分 15%,按配置筛选章节 18%,AI反向生成 20%-95%,完成 100%
|
||||
self._set_task_state(task, status="running", progress=5, message="正在识别编码并读取文本...")
|
||||
self._check_cancelled(task)
|
||||
|
||||
@@ -600,7 +618,7 @@ class BookImportService:
|
||||
)
|
||||
self._check_cancelled(task)
|
||||
|
||||
self._set_task_state(task, status="running", progress=18, message="仅保留末10章并重建预览结构...")
|
||||
self._set_task_state(task, status="running", progress=18, message="正在按解析配置筛选章节并构建预览...")
|
||||
preview = await self._build_preview(
|
||||
task=task,
|
||||
filename=task.filename,
|
||||
@@ -798,18 +816,26 @@ class BookImportService:
|
||||
|
||||
return count, total_words
|
||||
|
||||
def _trim_last_ten_for_apply(
|
||||
def _select_chapters_for_import(
|
||||
self,
|
||||
*,
|
||||
chapters: list[BookImportChapter],
|
||||
outlines: list[BookImportOutline],
|
||||
extract_mode: BookImportExtractMode,
|
||||
tail_chapter_count: int,
|
||||
) -> tuple[list[BookImportChapter], list[BookImportOutline], bool]:
|
||||
if not chapters:
|
||||
return [], [], False
|
||||
|
||||
sorted_chapters = sorted(chapters, key=lambda x: x.chapter_number)
|
||||
selected = sorted_chapters[-10:]
|
||||
was_trimmed = len(sorted_chapters) > len(selected) or len(outlines) > 10
|
||||
normalized_tail_count = max(5, int(tail_chapter_count))
|
||||
if normalized_tail_count > 50 or extract_mode == "full":
|
||||
selected = sorted_chapters
|
||||
else:
|
||||
normalized_tail_count = min(normalized_tail_count, len(sorted_chapters))
|
||||
selected = sorted_chapters[-normalized_tail_count:]
|
||||
|
||||
was_trimmed = len(sorted_chapters) > len(selected)
|
||||
|
||||
normalized_chapters: list[BookImportChapter] = []
|
||||
for idx, item in enumerate(selected, start=1):
|
||||
@@ -826,7 +852,10 @@ class BookImportService:
|
||||
normalized_outlines: list[BookImportOutline] = []
|
||||
sorted_outlines = sorted(outlines, key=lambda x: x.order_index) if outlines else []
|
||||
if sorted_outlines:
|
||||
selected_outlines = sorted_outlines[-len(normalized_chapters):]
|
||||
if extract_mode == "full":
|
||||
selected_outlines = sorted_outlines[:len(normalized_chapters)]
|
||||
else:
|
||||
selected_outlines = sorted_outlines[-len(normalized_chapters):]
|
||||
for idx, item in enumerate(selected_outlines, start=1):
|
||||
normalized_outlines.append(
|
||||
BookImportOutline(
|
||||
@@ -853,6 +882,30 @@ class BookImportService:
|
||||
|
||||
return normalized_chapters, normalized_outlines, was_trimmed
|
||||
|
||||
def _select_raw_chapters_for_preview(
|
||||
self,
|
||||
*,
|
||||
chapters_data: list[dict],
|
||||
extract_mode: BookImportExtractMode,
|
||||
tail_chapter_count: int,
|
||||
) -> tuple[list[dict], bool]:
|
||||
if not chapters_data:
|
||||
return [], False
|
||||
|
||||
normalized_tail_count = max(5, int(tail_chapter_count))
|
||||
if normalized_tail_count > 50 or extract_mode == "full":
|
||||
return chapters_data, False
|
||||
|
||||
normalized_tail_count = min(normalized_tail_count, len(chapters_data))
|
||||
|
||||
selected = chapters_data[-normalized_tail_count:]
|
||||
return selected, len(selected) < len(chapters_data)
|
||||
|
||||
def _get_extract_mode_label(self, extract_mode: BookImportExtractMode, selected_total: int) -> str:
|
||||
if extract_mode == "full" or selected_total > 50:
|
||||
return "整本"
|
||||
return f"末{selected_total}章"
|
||||
|
||||
def _derive_world_settings(
|
||||
self,
|
||||
*,
|
||||
@@ -974,9 +1027,13 @@ class BookImportService:
|
||||
chapters: list[BookImportChapter] = []
|
||||
warnings: list[BookImportWarning] = []
|
||||
|
||||
# 仅保留最后10章用于最终导入,重建章节序号为 1..N
|
||||
selected_chapters_raw = chapters_data[-10:] if len(chapters_data) > 10 else chapters_data
|
||||
selected_chapters_raw, was_trimmed = self._select_raw_chapters_for_preview(
|
||||
chapters_data=chapters_data,
|
||||
extract_mode=task.extract_mode,
|
||||
tail_chapter_count=task.tail_chapter_count,
|
||||
)
|
||||
selected_total = len(selected_chapters_raw)
|
||||
selection_label = self._get_extract_mode_label(task.extract_mode, selected_total)
|
||||
|
||||
title_counter: Counter[str] = Counter()
|
||||
for idx, chapter in enumerate(selected_chapters_raw, start=1):
|
||||
@@ -1020,7 +1077,7 @@ class BookImportService:
|
||||
task,
|
||||
status="running",
|
||||
progress=chapter_progress,
|
||||
message=f"已处理末章 {idx}/{selected_total} 个章节结构...",
|
||||
message=f"已处理{selection_label} {idx}/{selected_total} 个章节结构...",
|
||||
)
|
||||
|
||||
for title, count in title_counter.items():
|
||||
@@ -1033,11 +1090,11 @@ class BookImportService:
|
||||
)
|
||||
)
|
||||
|
||||
if len(chapters_data) > selected_total:
|
||||
if was_trimmed:
|
||||
warnings.append(
|
||||
BookImportWarning(
|
||||
code="trimmed_to_last_ten_chapters",
|
||||
message=f"已按规则仅保留最后 {selected_total} 章用于导入(原始识别 {len(chapters_data)} 章)",
|
||||
code="trimmed_for_extract_mode",
|
||||
message=f"已按解析配置仅保留{selection_label} {selected_total} 章用于导入(原始识别 {len(chapters_data)} 章)",
|
||||
level="info",
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user