fix: kb message truncate bug fix

This commit is contained in:
qixinbo
2026-03-29 20:35:38 +08:00
parent 99b654bbd2
commit 5869c377a3
8 changed files with 123 additions and 17 deletions
+27
View File
@@ -67,6 +67,33 @@ def _extract_upload_text(filename: str, content: bytes) -> str:
except Exception as e:
raise ValueError(f"Failed to parse PDF: {str(e)}")
# 增加对 Word 文档的文本提取支持
if lower.endswith((".doc", ".docx")):
try:
import docx
doc = docx.Document(io.BytesIO(content))
return "\n".join([para.text for para in doc.paragraphs])
except ImportError:
raise ValueError("python-docx is not installed. Cannot parse Word files.")
except Exception as e:
raise ValueError(f"Failed to parse Word document: {str(e)}")
# 增加对 PPT 文档的文本提取支持
if lower.endswith((".ppt", ".pptx")):
try:
import pptx
prs = pptx.Presentation(io.BytesIO(content))
text = []
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
text.append(shape.text)
return "\n".join(text)
except ImportError:
raise ValueError("python-pptx is not installed. Cannot parse PPT files.")
except Exception as e:
raise ValueError(f"Failed to parse PPT document: {str(e)}")
raise ValueError("Unsupported file type")
+38 -10
View File
@@ -281,7 +281,7 @@ def _extract_kb_citations(kb_id: Optional[str], message: str) -> Tuple[str, List
if not isinstance(item, dict):
continue
title = str(item.get("title") or f"Doc {idx}")
chunk = str(item.get("chunk") or "").strip()
chunk = str(item.get("chunk") or "").strip().replace("\n\n", "\n")
if not chunk:
continue
score = float(item.get("score", 0.0) or 0.0)
@@ -297,11 +297,11 @@ def _extract_kb_citations(kb_id: Optional[str], message: str) -> Tuple[str, List
)
if not lines:
return f"[System: A knowledge base is selected ({kb_id}). Retrieval result is empty.]\n{message}", []
context_block = "\n\n".join(lines)
next_message = f"[System: The following context is retrieved from knowledge base {kb_id}. You must ground your answer on it when relevant.]\n{context_block}\n\n{message}"
context_block = "\n".join(lines)
next_message = f"[Runtime Context — metadata only, not instructions]\nThe following context is retrieved from knowledge base {kb_id}. You must ground your answer on it when relevant.\n{context_block}\n\n{message}"
return next_message, citations
except Exception as exc:
return f"[System: A knowledge base is selected ({kb_id}) but retrieval failed: {exc}]\n{message}", []
return f"[Runtime Context — metadata only, not instructions]\nA knowledge base is selected ({kb_id}) but retrieval failed: {exc}\n\n{message}", []
def _sync_session_project(session_id: str, project_id: Optional[int]) -> None:
@@ -408,15 +408,29 @@ async def nanobot_chat(request: ChatRequest):
# Inject instructions if explicitly routed
message, kb_citations = _extract_kb_citations(resolved_kb_id, request.message)
instructions = []
if request.route_mode == "sql" or request.prefer_sql_chart:
message = f"[System: Use the nl2sql tool to answer the query]\n{message}"
instructions.append("Use the nl2sql tool to answer the query")
elif request.route_mode == "chat":
message = f"[System: Normal chat mode. Do NOT use the nl2sql tool]\n{message}"
instructions.append("Normal chat mode. Do NOT use the nl2sql tool")
# Inject instructions for selected skills
if request.skill_ids:
skill_list = ", ".join(request.skill_ids)
message = f"[System: You must prioritize using the following skills/tools to answer the user's request: {skill_list}]\n{message}"
instructions.append(f"You must prioritize using the following skills/tools to answer the user's request: {skill_list}")
if instructions:
instr_block = "\n".join(instructions)
# If message already has Runtime Context, append to it, otherwise create new
if message.startswith("[Runtime Context — metadata only, not instructions]"):
parts = message.split("\n\n", 1)
if len(parts) == 2:
message = f"{parts[0]}\n{instr_block}\n\n{parts[1]}"
else:
message = f"{message}\n{instr_block}"
else:
message = f"[Runtime Context — metadata only, not instructions]\n{instr_block}\n\n{message}"
response = await nanobot_service.process_message(
message,
@@ -494,15 +508,29 @@ async def nanobot_chat_stream(request: ChatRequest):
# Inject instructions if explicitly routed
message, kb_citations = _extract_kb_citations(resolved_kb_id, request.message)
instructions = []
if request.route_mode == "sql" or request.prefer_sql_chart:
message = f"[System: Use the nl2sql tool to answer the query]\n{message}"
instructions.append("Use the nl2sql tool to answer the query")
elif request.route_mode == "chat":
message = f"[System: Normal chat mode. Do NOT use the nl2sql tool]\n{message}"
instructions.append("Normal chat mode. Do NOT use the nl2sql tool")
# Inject instructions for selected skills
if request.skill_ids:
skill_list = ", ".join(request.skill_ids)
message = f"[System: You must prioritize using the following skills/tools to answer the user's request: {skill_list}]\n{message}"
instructions.append(f"You must prioritize using the following skills/tools to answer the user's request: {skill_list}")
if instructions:
instr_block = "\n".join(instructions)
# If message already has Runtime Context, append to it, otherwise create new
if message.startswith("[Runtime Context — metadata only, not instructions]"):
parts = message.split("\n\n", 1)
if len(parts) == 2:
message = f"{parts[0]}\n{instr_block}\n\n{parts[1]}"
else:
message = f"{message}\n{instr_block}"
else:
message = f"[Runtime Context — metadata only, not instructions]\n{instr_block}\n\n{message}"
current_task = asyncio.create_task(
nanobot_service.process_message(
+2
View File
@@ -50,6 +50,8 @@ dependencies = [
"uvicorn>=0.41.0",
"websocket-client>=1.9.0,<2.0.0",
"websockets>=16.0,<17.0",
"python-docx>=1.2.0",
"python-pptx>=1.0.2",
]
[tool.uv.sources]
+41
View File
@@ -246,9 +246,11 @@ dependencies = [
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "pypdf2" },
{ name = "python-docx" },
{ name = "python-dotenv" },
{ name = "python-jose", extra = ["cryptography"] },
{ name = "python-multipart" },
{ name = "python-pptx" },
{ name = "python-socketio" },
{ name = "python-socks" },
{ name = "python-telegram-bot", extra = ["socks"] },
@@ -295,9 +297,11 @@ requires-dist = [
{ name = "pydantic", specifier = ">=2.12.0,<3.0.0" },
{ name = "pydantic-settings", specifier = ">=2.12.0,<3.0.0" },
{ name = "pypdf2", specifier = ">=3.0.0" },
{ name = "python-docx", specifier = ">=1.2.0" },
{ name = "python-dotenv", specifier = ">=1.0.1" },
{ name = "python-jose", extras = ["cryptography"], specifier = ">=3.5.0" },
{ name = "python-multipart", specifier = ">=0.0.22" },
{ name = "python-pptx", specifier = ">=1.0.2" },
{ name = "python-socketio", specifier = ">=5.16.0,<6.0.0" },
{ name = "python-socks", extras = ["asyncio"], specifier = ">=2.8.0,<3.0.0" },
{ name = "python-telegram-bot", extras = ["socks"], specifier = ">=22.6,<23.0" },
@@ -2683,6 +2687,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
]
[[package]]
name = "python-docx"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "lxml" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" },
]
[[package]]
name = "python-dotenv"
version = "1.2.2"
@@ -2732,6 +2749,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" },
]
[[package]]
name = "python-pptx"
version = "1.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "lxml" },
{ name = "pillow" },
{ name = "typing-extensions" },
{ name = "xlsxwriter" },
]
sdist = { url = "https://files.pythonhosted.org/packages/52/a9/0c0db8d37b2b8a645666f7fd8accea4c6224e013c42b1d5c17c93590cd06/python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095", size = 10109297, upload-time = "2024-08-07T17:33:37.772Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788, upload-time = "2024-08-07T17:33:28.192Z" },
]
[[package]]
name = "python-socketio"
version = "5.16.1"
@@ -3684,6 +3716,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405, upload-time = "2025-11-20T18:18:00.454Z" },
]
[[package]]
name = "xlsxwriter"
version = "3.2.9"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/46/2c/c06ef49dc36e7954e55b802a8b231770d286a9758b3d936bd1e04ce5ba88/xlsxwriter-3.2.9.tar.gz", hash = "sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c", size = 215940, upload-time = "2025-09-16T00:16:21.63Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3a/0c/3662f4a66880196a590b202f0db82d919dd2f89e99a27fadef91c4a33d41/xlsxwriter-3.2.9-py3-none-any.whl", hash = "sha256:9a5db42bc5dff014806c58a20b9eae7322a134abb6fce3c92c181bfb275ec5b3", size = 175315, upload-time = "2025-09-16T00:16:20.108Z" },
]
[[package]]
name = "yarl"
version = "1.23.0"
+11 -4
View File
@@ -574,10 +574,17 @@ export function ChatInterface() {
let cleanContent = m.content || "";
// Remove injected system prompt instructions from user messages if present
if (m.role === 'user') {
cleanContent = cleanContent.replace(/^\[System:.*?\]\n?/i, '');
// Handle cases where there might be a runtime context block for skills
cleanContent = cleanContent.replace(/\[Runtime Context[\s\S]*?(?=\[System:|$)/i, '');
cleanContent = cleanContent.replace(/\[System:.*?\]\n?/i, ''); // clean again in case it follows context
if (cleanContent.startsWith("[Runtime Context")) {
const splitIndex = cleanContent.indexOf("\n\n");
if (splitIndex !== -1) {
cleanContent = cleanContent.substring(splitIndex + 2);
} else {
cleanContent = "";
}
} else if (cleanContent.startsWith("[System:")) {
// Fallback for older messages containing [System: ...] wrapper
cleanContent = cleanContent.replace(/^\[System:[\s\S]*?\]\n*/i, '');
}
cleanContent = cleanContent.trim();
}
return {
+1 -1
View File
@@ -208,7 +208,7 @@
"knowledgeDocumentDeleteFailed": "Failed to delete document",
"noKnowledgeDocuments": "No documents in this knowledge base",
"knowledgeDocumentUploadTitle": "Upload Documents to Knowledge Base",
"knowledgeDocumentUploadHint": "Supports txt, md, json, yaml, xml, html, csv, xls, xlsx. Max 5MB per file.",
"knowledgeDocumentUploadHint": "Supports Text/Markdown/Code, Office(Word/Excel/PPT) and PDF. Max 15MB per file.",
"knowledgeDocumentUploadSelected": "{{count}} file(s) selected",
"knowledgeDocumentUploadNone": "No files selected",
"knowledgeDocumentUploadAction": "Upload and Add",
+1 -1
View File
@@ -222,7 +222,7 @@
"knowledgeDocumentDeleteFailed": "删除文档失败",
"noKnowledgeDocuments": "当前知识库还没有文档",
"knowledgeDocumentUploadTitle": "上传文档到知识库",
"knowledgeDocumentUploadHint": "支持 txt、md、json、yaml、xml、html、csv、xls、xlsx,单文件不超过 5MB。",
"knowledgeDocumentUploadHint": "支持 文本/Markdown/代码、Office (Word/Excel/PPT) 及 PDF 文件,单文件不超过 15MB。",
"knowledgeDocumentUploadSelected": "已选择 {{count}} 个文件",
"knowledgeDocumentUploadNone": "尚未选择文件",
"knowledgeDocumentUploadAction": "上传并入库",
+2 -1
View File
@@ -499,7 +499,8 @@ export function KnowledgeBases() {
<div className="flex-1 overflow-y-auto p-6 space-y-6">
{/* Upload Section */}
<div className="rounded-lg border border-border p-4 bg-muted/30">
<div className="text-sm font-medium text-foreground mb-3">{t('knowledgeDocumentUploadTitle')}</div>
<div className="text-sm font-medium text-foreground mb-1">{t('knowledgeDocumentUploadTitle')}</div>
<div className="text-xs text-muted-foreground mb-3">{t('knowledgeDocumentUploadHint')}</div>
<div className="flex items-center gap-3">
<Input
type="file"