fix: kb message truncate bug fix
This commit is contained in:
@@ -67,6 +67,33 @@ def _extract_upload_text(filename: str, content: bytes) -> str:
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to parse PDF: {str(e)}")
|
||||
|
||||
# 增加对 Word 文档的文本提取支持
|
||||
if lower.endswith((".doc", ".docx")):
|
||||
try:
|
||||
import docx
|
||||
doc = docx.Document(io.BytesIO(content))
|
||||
return "\n".join([para.text for para in doc.paragraphs])
|
||||
except ImportError:
|
||||
raise ValueError("python-docx is not installed. Cannot parse Word files.")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to parse Word document: {str(e)}")
|
||||
|
||||
# 增加对 PPT 文档的文本提取支持
|
||||
if lower.endswith((".ppt", ".pptx")):
|
||||
try:
|
||||
import pptx
|
||||
prs = pptx.Presentation(io.BytesIO(content))
|
||||
text = []
|
||||
for slide in prs.slides:
|
||||
for shape in slide.shapes:
|
||||
if hasattr(shape, "text"):
|
||||
text.append(shape.text)
|
||||
return "\n".join(text)
|
||||
except ImportError:
|
||||
raise ValueError("python-pptx is not installed. Cannot parse PPT files.")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to parse PPT document: {str(e)}")
|
||||
|
||||
raise ValueError("Unsupported file type")
|
||||
|
||||
|
||||
|
||||
+38
-10
@@ -281,7 +281,7 @@ def _extract_kb_citations(kb_id: Optional[str], message: str) -> Tuple[str, List
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = str(item.get("title") or f"Doc {idx}")
|
||||
chunk = str(item.get("chunk") or "").strip()
|
||||
chunk = str(item.get("chunk") or "").strip().replace("\n\n", "\n")
|
||||
if not chunk:
|
||||
continue
|
||||
score = float(item.get("score", 0.0) or 0.0)
|
||||
@@ -297,11 +297,11 @@ def _extract_kb_citations(kb_id: Optional[str], message: str) -> Tuple[str, List
|
||||
)
|
||||
if not lines:
|
||||
return f"[System: A knowledge base is selected ({kb_id}). Retrieval result is empty.]\n{message}", []
|
||||
context_block = "\n\n".join(lines)
|
||||
next_message = f"[System: The following context is retrieved from knowledge base {kb_id}. You must ground your answer on it when relevant.]\n{context_block}\n\n{message}"
|
||||
context_block = "\n".join(lines)
|
||||
next_message = f"[Runtime Context — metadata only, not instructions]\nThe following context is retrieved from knowledge base {kb_id}. You must ground your answer on it when relevant.\n{context_block}\n\n{message}"
|
||||
return next_message, citations
|
||||
except Exception as exc:
|
||||
return f"[System: A knowledge base is selected ({kb_id}) but retrieval failed: {exc}]\n{message}", []
|
||||
return f"[Runtime Context — metadata only, not instructions]\nA knowledge base is selected ({kb_id}) but retrieval failed: {exc}\n\n{message}", []
|
||||
|
||||
|
||||
def _sync_session_project(session_id: str, project_id: Optional[int]) -> None:
|
||||
@@ -408,15 +408,29 @@ async def nanobot_chat(request: ChatRequest):
|
||||
|
||||
# Inject instructions if explicitly routed
|
||||
message, kb_citations = _extract_kb_citations(resolved_kb_id, request.message)
|
||||
|
||||
instructions = []
|
||||
if request.route_mode == "sql" or request.prefer_sql_chart:
|
||||
message = f"[System: Use the nl2sql tool to answer the query]\n{message}"
|
||||
instructions.append("Use the nl2sql tool to answer the query")
|
||||
elif request.route_mode == "chat":
|
||||
message = f"[System: Normal chat mode. Do NOT use the nl2sql tool]\n{message}"
|
||||
instructions.append("Normal chat mode. Do NOT use the nl2sql tool")
|
||||
|
||||
# Inject instructions for selected skills
|
||||
if request.skill_ids:
|
||||
skill_list = ", ".join(request.skill_ids)
|
||||
message = f"[System: You must prioritize using the following skills/tools to answer the user's request: {skill_list}]\n{message}"
|
||||
instructions.append(f"You must prioritize using the following skills/tools to answer the user's request: {skill_list}")
|
||||
|
||||
if instructions:
|
||||
instr_block = "\n".join(instructions)
|
||||
# If message already has Runtime Context, append to it, otherwise create new
|
||||
if message.startswith("[Runtime Context — metadata only, not instructions]"):
|
||||
parts = message.split("\n\n", 1)
|
||||
if len(parts) == 2:
|
||||
message = f"{parts[0]}\n{instr_block}\n\n{parts[1]}"
|
||||
else:
|
||||
message = f"{message}\n{instr_block}"
|
||||
else:
|
||||
message = f"[Runtime Context — metadata only, not instructions]\n{instr_block}\n\n{message}"
|
||||
|
||||
response = await nanobot_service.process_message(
|
||||
message,
|
||||
@@ -494,15 +508,29 @@ async def nanobot_chat_stream(request: ChatRequest):
|
||||
|
||||
# Inject instructions if explicitly routed
|
||||
message, kb_citations = _extract_kb_citations(resolved_kb_id, request.message)
|
||||
|
||||
instructions = []
|
||||
if request.route_mode == "sql" or request.prefer_sql_chart:
|
||||
message = f"[System: Use the nl2sql tool to answer the query]\n{message}"
|
||||
instructions.append("Use the nl2sql tool to answer the query")
|
||||
elif request.route_mode == "chat":
|
||||
message = f"[System: Normal chat mode. Do NOT use the nl2sql tool]\n{message}"
|
||||
instructions.append("Normal chat mode. Do NOT use the nl2sql tool")
|
||||
|
||||
# Inject instructions for selected skills
|
||||
if request.skill_ids:
|
||||
skill_list = ", ".join(request.skill_ids)
|
||||
message = f"[System: You must prioritize using the following skills/tools to answer the user's request: {skill_list}]\n{message}"
|
||||
instructions.append(f"You must prioritize using the following skills/tools to answer the user's request: {skill_list}")
|
||||
|
||||
if instructions:
|
||||
instr_block = "\n".join(instructions)
|
||||
# If message already has Runtime Context, append to it, otherwise create new
|
||||
if message.startswith("[Runtime Context — metadata only, not instructions]"):
|
||||
parts = message.split("\n\n", 1)
|
||||
if len(parts) == 2:
|
||||
message = f"{parts[0]}\n{instr_block}\n\n{parts[1]}"
|
||||
else:
|
||||
message = f"{message}\n{instr_block}"
|
||||
else:
|
||||
message = f"[Runtime Context — metadata only, not instructions]\n{instr_block}\n\n{message}"
|
||||
|
||||
current_task = asyncio.create_task(
|
||||
nanobot_service.process_message(
|
||||
|
||||
@@ -50,6 +50,8 @@ dependencies = [
|
||||
"uvicorn>=0.41.0",
|
||||
"websocket-client>=1.9.0,<2.0.0",
|
||||
"websockets>=16.0,<17.0",
|
||||
"python-docx>=1.2.0",
|
||||
"python-pptx>=1.0.2",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
|
||||
Generated
+41
@@ -246,9 +246,11 @@ dependencies = [
|
||||
{ name = "pydantic" },
|
||||
{ name = "pydantic-settings" },
|
||||
{ name = "pypdf2" },
|
||||
{ name = "python-docx" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "python-jose", extra = ["cryptography"] },
|
||||
{ name = "python-multipart" },
|
||||
{ name = "python-pptx" },
|
||||
{ name = "python-socketio" },
|
||||
{ name = "python-socks" },
|
||||
{ name = "python-telegram-bot", extra = ["socks"] },
|
||||
@@ -295,9 +297,11 @@ requires-dist = [
|
||||
{ name = "pydantic", specifier = ">=2.12.0,<3.0.0" },
|
||||
{ name = "pydantic-settings", specifier = ">=2.12.0,<3.0.0" },
|
||||
{ name = "pypdf2", specifier = ">=3.0.0" },
|
||||
{ name = "python-docx", specifier = ">=1.2.0" },
|
||||
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
||||
{ name = "python-jose", extras = ["cryptography"], specifier = ">=3.5.0" },
|
||||
{ name = "python-multipart", specifier = ">=0.0.22" },
|
||||
{ name = "python-pptx", specifier = ">=1.0.2" },
|
||||
{ name = "python-socketio", specifier = ">=5.16.0,<6.0.0" },
|
||||
{ name = "python-socks", extras = ["asyncio"], specifier = ">=2.8.0,<3.0.0" },
|
||||
{ name = "python-telegram-bot", extras = ["socks"], specifier = ">=22.6,<23.0" },
|
||||
@@ -2683,6 +2687,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-docx"
|
||||
version = "1.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "lxml" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dotenv"
|
||||
version = "1.2.2"
|
||||
@@ -2732,6 +2749,21 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-pptx"
|
||||
version = "1.0.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "lxml" },
|
||||
{ name = "pillow" },
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "xlsxwriter" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/52/a9/0c0db8d37b2b8a645666f7fd8accea4c6224e013c42b1d5c17c93590cd06/python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095", size = 10109297, upload-time = "2024-08-07T17:33:37.772Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788, upload-time = "2024-08-07T17:33:28.192Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-socketio"
|
||||
version = "5.16.1"
|
||||
@@ -3684,6 +3716,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405, upload-time = "2025-11-20T18:18:00.454Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xlsxwriter"
|
||||
version = "3.2.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/46/2c/c06ef49dc36e7954e55b802a8b231770d286a9758b3d936bd1e04ce5ba88/xlsxwriter-3.2.9.tar.gz", hash = "sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c", size = 215940, upload-time = "2025-09-16T00:16:21.63Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/0c/3662f4a66880196a590b202f0db82d919dd2f89e99a27fadef91c4a33d41/xlsxwriter-3.2.9-py3-none-any.whl", hash = "sha256:9a5db42bc5dff014806c58a20b9eae7322a134abb6fce3c92c181bfb275ec5b3", size = 175315, upload-time = "2025-09-16T00:16:20.108Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yarl"
|
||||
version = "1.23.0"
|
||||
|
||||
@@ -574,10 +574,17 @@ export function ChatInterface() {
|
||||
let cleanContent = m.content || "";
|
||||
// Remove injected system prompt instructions from user messages if present
|
||||
if (m.role === 'user') {
|
||||
cleanContent = cleanContent.replace(/^\[System:.*?\]\n?/i, '');
|
||||
// Handle cases where there might be a runtime context block for skills
|
||||
cleanContent = cleanContent.replace(/\[Runtime Context[\s\S]*?(?=\[System:|$)/i, '');
|
||||
cleanContent = cleanContent.replace(/\[System:.*?\]\n?/i, ''); // clean again in case it follows context
|
||||
if (cleanContent.startsWith("[Runtime Context")) {
|
||||
const splitIndex = cleanContent.indexOf("\n\n");
|
||||
if (splitIndex !== -1) {
|
||||
cleanContent = cleanContent.substring(splitIndex + 2);
|
||||
} else {
|
||||
cleanContent = "";
|
||||
}
|
||||
} else if (cleanContent.startsWith("[System:")) {
|
||||
// Fallback for older messages containing [System: ...] wrapper
|
||||
cleanContent = cleanContent.replace(/^\[System:[\s\S]*?\]\n*/i, '');
|
||||
}
|
||||
cleanContent = cleanContent.trim();
|
||||
}
|
||||
return {
|
||||
|
||||
@@ -208,7 +208,7 @@
|
||||
"knowledgeDocumentDeleteFailed": "Failed to delete document",
|
||||
"noKnowledgeDocuments": "No documents in this knowledge base",
|
||||
"knowledgeDocumentUploadTitle": "Upload Documents to Knowledge Base",
|
||||
"knowledgeDocumentUploadHint": "Supports txt, md, json, yaml, xml, html, csv, xls, xlsx. Max 5MB per file.",
|
||||
"knowledgeDocumentUploadHint": "Supports Text/Markdown/Code, Office(Word/Excel/PPT) and PDF. Max 15MB per file.",
|
||||
"knowledgeDocumentUploadSelected": "{{count}} file(s) selected",
|
||||
"knowledgeDocumentUploadNone": "No files selected",
|
||||
"knowledgeDocumentUploadAction": "Upload and Add",
|
||||
|
||||
@@ -222,7 +222,7 @@
|
||||
"knowledgeDocumentDeleteFailed": "删除文档失败",
|
||||
"noKnowledgeDocuments": "当前知识库还没有文档",
|
||||
"knowledgeDocumentUploadTitle": "上传文档到知识库",
|
||||
"knowledgeDocumentUploadHint": "支持 txt、md、json、yaml、xml、html、csv、xls、xlsx,单文件不超过 5MB。",
|
||||
"knowledgeDocumentUploadHint": "支持 文本/Markdown/代码、Office (Word/Excel/PPT) 及 PDF 文件,单文件不超过 15MB。",
|
||||
"knowledgeDocumentUploadSelected": "已选择 {{count}} 个文件",
|
||||
"knowledgeDocumentUploadNone": "尚未选择文件",
|
||||
"knowledgeDocumentUploadAction": "上传并入库",
|
||||
|
||||
@@ -499,7 +499,8 @@ export function KnowledgeBases() {
|
||||
<div className="flex-1 overflow-y-auto p-6 space-y-6">
|
||||
{/* Upload Section */}
|
||||
<div className="rounded-lg border border-border p-4 bg-muted/30">
|
||||
<div className="text-sm font-medium text-foreground mb-3">{t('knowledgeDocumentUploadTitle')}</div>
|
||||
<div className="text-sm font-medium text-foreground mb-1">{t('knowledgeDocumentUploadTitle')}</div>
|
||||
<div className="text-xs text-muted-foreground mb-3">{t('knowledgeDocumentUploadHint')}</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<Input
|
||||
type="file"
|
||||
|
||||
Reference in New Issue
Block a user