diff --git a/backend/app/api/book_import.py b/backend/app/api/book_import.py index f7a6daf..b3beeb4 100644 --- a/backend/app/api/book_import.py +++ b/backend/app/api/book_import.py @@ -15,6 +15,7 @@ from app.schemas.book_import import ( BookImportPreviewResponse, BookImportRetryRequest, BookImportTaskCreateResponse, + BookImportTaskCreateRequest, BookImportTaskStatusResponse, ) from app.services.book_import_service import book_import_service @@ -33,6 +34,8 @@ async def create_book_import_task( project_id: str | None = Form(default=None, description="兼容参数:当前版本固定新建项目,不支持传入"), create_new_project: bool = Form(default=True, description="兼容参数:当前版本仅支持 true"), import_mode: str = Form(default="append", description="导入模式:append/overwrite"), + extract_mode: str = Form(default="tail", description="解析范围:tail=截取末章,full=整本"), + tail_chapter_count: int = Form(default=10, description="当 extract_mode=tail 时,截取末尾章节数,需为5的倍数;超过50按整本拆处理"), ): user_id = getattr(request.state, "user_id", None) if not user_id: @@ -44,11 +47,26 @@ async def create_book_import_task( if import_mode not in {"append", "overwrite"}: raise HTTPException(status_code=400, detail="import_mode 仅支持 append 或 overwrite") + if extract_mode not in {"tail", "full"}: + raise HTTPException(status_code=400, detail="extract_mode 仅支持 tail 或 full") + if tail_chapter_count < 5: + raise HTTPException(status_code=400, detail="tail_chapter_count 不能小于 5") + if tail_chapter_count % 5 != 0: + raise HTTPException(status_code=400, detail="tail_chapter_count 必须是 5 的倍数") + + if tail_chapter_count > 50: + extract_mode = "full" + if project_id: raise HTTPException(status_code=400, detail="当前仅支持新建项目导入,不支持指定 project_id") if not create_new_project: raise HTTPException(status_code=400, detail="当前仅支持新建项目导入") + create_payload = BookImportTaskCreateRequest( + extract_mode=extract_mode, + tail_chapter_count=tail_chapter_count, + ) + content = await file.read() if len(content) > MAX_TXT_SIZE: raise HTTPException(status_code=413, detail="文件大小超过 50MB 限制") @@ -60,6 +78,8 @@ async def create_book_import_task( project_id=None, create_new_project=True, import_mode=import_mode, + extract_mode=create_payload.extract_mode, + tail_chapter_count=create_payload.tail_chapter_count, ) return task diff --git a/backend/app/schemas/book_import.py b/backend/app/schemas/book_import.py index 38c3588..2186f62 100644 --- a/backend/app/schemas/book_import.py +++ b/backend/app/schemas/book_import.py @@ -9,6 +9,7 @@ TaskStatus = Literal["pending", "running", "completed", "failed", "cancelled"] ImportMode = Literal["append", "overwrite"] ExtractLevel = Literal["basic", "standard", "deep"] WarningLevel = Literal["info", "warning", "error"] +BookImportExtractMode = Literal["tail", "full"] class BookImportWarning(BaseModel): @@ -45,6 +46,12 @@ class BookImportOutline(BaseModel): structure: Optional[dict[str, Any]] = Field(None, description="结构化大纲(与系统大纲生成结构一致)") +class BookImportTaskCreateRequest(BaseModel): + """创建拆书任务请求""" + extract_mode: BookImportExtractMode = Field(default="tail", description="提取范围:tail=截取末章,full=整本") + tail_chapter_count: int = Field(default=10, ge=5, le=9999, description="当 extract_mode=tail 时,截取末尾章节数;需为5的倍数,超过50将按整本处理") + + class BookImportTaskCreateResponse(BaseModel): """创建任务响应""" task_id: str @@ -89,4 +96,4 @@ class BookImportApplyResponse(BaseModel): class BookImportRetryRequest(BaseModel): """重试失败步骤请求""" - steps: list[str] = Field(..., min_length=1, description="需要重试的步骤名列表,如 world_building / career_system / characters") \ No newline at end of file + steps: list[str] = Field(..., min_length=1, description="需要重试的步骤名列表,如 world_building / career_system / characters") diff --git a/backend/app/services/book_import_service.py b/backend/app/services/book_import_service.py index 260a972..c7f0335 100644 --- a/backend/app/services/book_import_service.py +++ b/backend/app/services/book_import_service.py @@ -34,6 +34,7 @@ from app.schemas.book_import import ( BookImportApplyRequest, BookImportApplyResponse, BookImportChapter, + BookImportExtractMode, BookImportOutline, BookImportPreviewResponse, BookImportTaskCreateResponse, @@ -65,6 +66,8 @@ class _BookImportTask: project_id: Optional[str] create_new_project: bool import_mode: str + extract_mode: BookImportExtractMode = "tail" + tail_chapter_count: int = 10 status: str = "pending" progress: int = 0 message: Optional[str] = "任务已创建" @@ -95,7 +98,16 @@ class BookImportService: project_id: Optional[str], create_new_project: bool, import_mode: str, + extract_mode: BookImportExtractMode = "tail", + tail_chapter_count: int = 10, ) -> BookImportTaskCreateResponse: + normalized_tail_count = max(5, int(tail_chapter_count)) + normalized_extract_mode = extract_mode + if normalized_tail_count % 5 != 0: + normalized_tail_count = ((normalized_tail_count + 4) // 5) * 5 + if normalized_tail_count > 50: + normalized_extract_mode = "full" + task_id = str(uuid.uuid4()) task = _BookImportTask( task_id=task_id, @@ -104,6 +116,8 @@ class BookImportService: project_id=project_id, create_new_project=create_new_project, import_mode=import_mode, + extract_mode=normalized_extract_mode, + tail_chapter_count=normalized_tail_count, ) async with self._tasks_lock: self._tasks[task_id] = task @@ -150,15 +164,17 @@ class BookImportService: } warnings = list(task.preview.warnings) if task.preview else [] - chapters_to_import, outlines_to_import, was_trimmed = self._trim_last_ten_for_apply( + chapters_to_import, outlines_to_import, was_trimmed = self._select_chapters_for_import( chapters=payload.chapters, outlines=payload.outlines, + extract_mode=task.extract_mode, + tail_chapter_count=task.tail_chapter_count, ) if was_trimmed: warnings.append( BookImportWarning( - code="apply_trimmed_to_last_ten", - message=f"导入阶段已强制仅保留最后 {len(chapters_to_import)} 章", + code="apply_trimmed_for_extract_mode", + message=f"导入阶段已按解析配置仅保留 {len(chapters_to_import)} 章", level="info", ) ) @@ -248,15 +264,17 @@ class BookImportService: } warnings = list(task.preview.warnings) if task.preview else [] - chapters_to_import, outlines_to_import, was_trimmed = self._trim_last_ten_for_apply( + chapters_to_import, outlines_to_import, was_trimmed = self._select_chapters_for_import( chapters=payload.chapters, outlines=payload.outlines, + extract_mode=task.extract_mode, + tail_chapter_count=task.tail_chapter_count, ) if was_trimmed: warnings.append( BookImportWarning( - code="apply_trimmed_to_last_ten", - message=f"导入阶段已强制仅保留最后 {len(chapters_to_import)} 章", + code="apply_trimmed_for_extract_mode", + message=f"导入阶段已按解析配置仅保留 {len(chapters_to_import)} 章", level="info", ) ) @@ -580,7 +598,7 @@ class BookImportService: return try: - # 进度分配:编码识别 5%,文本清洗 10%,章节切分 15%,截取末10章 18%,AI反向生成 20%-95%,完成 100% + # 进度分配:编码识别 5%,文本清洗 10%,章节切分 15%,按配置筛选章节 18%,AI反向生成 20%-95%,完成 100% self._set_task_state(task, status="running", progress=5, message="正在识别编码并读取文本...") self._check_cancelled(task) @@ -600,7 +618,7 @@ class BookImportService: ) self._check_cancelled(task) - self._set_task_state(task, status="running", progress=18, message="仅保留末10章并重建预览结构...") + self._set_task_state(task, status="running", progress=18, message="正在按解析配置筛选章节并构建预览...") preview = await self._build_preview( task=task, filename=task.filename, @@ -798,18 +816,26 @@ class BookImportService: return count, total_words - def _trim_last_ten_for_apply( + def _select_chapters_for_import( self, *, chapters: list[BookImportChapter], outlines: list[BookImportOutline], + extract_mode: BookImportExtractMode, + tail_chapter_count: int, ) -> tuple[list[BookImportChapter], list[BookImportOutline], bool]: if not chapters: return [], [], False sorted_chapters = sorted(chapters, key=lambda x: x.chapter_number) - selected = sorted_chapters[-10:] - was_trimmed = len(sorted_chapters) > len(selected) or len(outlines) > 10 + normalized_tail_count = max(5, int(tail_chapter_count)) + if normalized_tail_count > 50 or extract_mode == "full": + selected = sorted_chapters + else: + normalized_tail_count = min(normalized_tail_count, len(sorted_chapters)) + selected = sorted_chapters[-normalized_tail_count:] + + was_trimmed = len(sorted_chapters) > len(selected) normalized_chapters: list[BookImportChapter] = [] for idx, item in enumerate(selected, start=1): @@ -826,7 +852,10 @@ class BookImportService: normalized_outlines: list[BookImportOutline] = [] sorted_outlines = sorted(outlines, key=lambda x: x.order_index) if outlines else [] if sorted_outlines: - selected_outlines = sorted_outlines[-len(normalized_chapters):] + if extract_mode == "full": + selected_outlines = sorted_outlines[:len(normalized_chapters)] + else: + selected_outlines = sorted_outlines[-len(normalized_chapters):] for idx, item in enumerate(selected_outlines, start=1): normalized_outlines.append( BookImportOutline( @@ -853,6 +882,30 @@ class BookImportService: return normalized_chapters, normalized_outlines, was_trimmed + def _select_raw_chapters_for_preview( + self, + *, + chapters_data: list[dict], + extract_mode: BookImportExtractMode, + tail_chapter_count: int, + ) -> tuple[list[dict], bool]: + if not chapters_data: + return [], False + + normalized_tail_count = max(5, int(tail_chapter_count)) + if normalized_tail_count > 50 or extract_mode == "full": + return chapters_data, False + + normalized_tail_count = min(normalized_tail_count, len(chapters_data)) + + selected = chapters_data[-normalized_tail_count:] + return selected, len(selected) < len(chapters_data) + + def _get_extract_mode_label(self, extract_mode: BookImportExtractMode, selected_total: int) -> str: + if extract_mode == "full" or selected_total > 50: + return "整本" + return f"末{selected_total}章" + def _derive_world_settings( self, *, @@ -974,9 +1027,13 @@ class BookImportService: chapters: list[BookImportChapter] = [] warnings: list[BookImportWarning] = [] - # 仅保留最后10章用于最终导入,重建章节序号为 1..N - selected_chapters_raw = chapters_data[-10:] if len(chapters_data) > 10 else chapters_data + selected_chapters_raw, was_trimmed = self._select_raw_chapters_for_preview( + chapters_data=chapters_data, + extract_mode=task.extract_mode, + tail_chapter_count=task.tail_chapter_count, + ) selected_total = len(selected_chapters_raw) + selection_label = self._get_extract_mode_label(task.extract_mode, selected_total) title_counter: Counter[str] = Counter() for idx, chapter in enumerate(selected_chapters_raw, start=1): @@ -1020,7 +1077,7 @@ class BookImportService: task, status="running", progress=chapter_progress, - message=f"已处理末章 {idx}/{selected_total} 个章节结构...", + message=f"已处理{selection_label} {idx}/{selected_total} 个章节结构...", ) for title, count in title_counter.items(): @@ -1033,11 +1090,11 @@ class BookImportService: ) ) - if len(chapters_data) > selected_total: + if was_trimmed: warnings.append( BookImportWarning( - code="trimmed_to_last_ten_chapters", - message=f"已按规则仅保留最后 {selected_total} 章用于导入(原始识别 {len(chapters_data)} 章)", + code="trimmed_for_extract_mode", + message=f"已按解析配置仅保留{selection_label} {selected_total} 章用于导入(原始识别 {len(chapters_data)} 章)", level="info", ) ) diff --git a/frontend/src/pages/BookImport.tsx b/frontend/src/pages/BookImport.tsx index 6adcc51..ac64f64 100644 --- a/frontend/src/pages/BookImport.tsx +++ b/frontend/src/pages/BookImport.tsx @@ -28,6 +28,7 @@ import { InboxOutlined, PlayCircleOutlined, ReloadOutlined, StopOutlined, Warnin import { bookImportApi } from '../services/api'; import type { BookImportApplyPayload, + BookImportExtractMode, BookImportPreview, BookImportStepFailure, BookImportTask, @@ -111,6 +112,8 @@ export default function BookImport() { const { token } = theme.useToken(); const isMobile = window.innerWidth <= 768; const [file, setFile] = useState(null); + const [extractMode, setExtractMode] = useState('tail'); + const [tailChapterCount, setTailChapterCount] = useState(10); const [taskId, setTaskId] = useState(null); const [taskStatus, setTaskStatus] = useState(null); @@ -322,8 +325,13 @@ export default function BookImport() { setPreview(null); setTaskStatus(null); + const normalizedTailChapterCount = Math.max(5, Math.ceil(tailChapterCount / 5) * 5); + const normalizedExtractMode = normalizedTailChapterCount > 50 ? 'full' : extractMode; + const response = await bookImportApi.createTask({ file, + extract_mode: normalizedExtractMode, + tail_chapter_count: normalizedTailChapterCount, }); setTaskId(response.task_id); @@ -546,6 +554,8 @@ export default function BookImport() { setRetrying(false); setRetryProgress(0); setRetryMessage(''); + setExtractMode('tail'); + setTailChapterCount(10); message.success('已重新开始,请重新上传 TXT 并解析'); }, []); @@ -688,6 +698,38 @@ export default function BookImport() {

首版仅支持 .txt,建议不超过 50MB

+ + +