mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-14 06:26:18 +00:00
Merge branch 'dev' into dbgpt_doc
This commit is contained in:
commit
bc22987510
@ -53,9 +53,11 @@ class BaseOutputParser(ABC):
|
|||||||
"""
|
"""
|
||||||
if data["error_code"] == 0:
|
if data["error_code"] == 0:
|
||||||
if "vicuna" in CFG.LLM_MODEL:
|
if "vicuna" in CFG.LLM_MODEL:
|
||||||
output = data["text"][skip_echo_len + 11:].strip()
|
# output = data["text"][skip_echo_len + 11:].strip()
|
||||||
|
output = data["text"][skip_echo_len:].strip()
|
||||||
elif "guanaco" in CFG.LLM_MODEL:
|
elif "guanaco" in CFG.LLM_MODEL:
|
||||||
output = data["text"][skip_echo_len + 14:].replace("<s>", "").strip()
|
# output = data["text"][skip_echo_len + 14:].replace("<s>", "").strip()
|
||||||
|
output = data["text"][skip_echo_len:].replace("<s>", "").strip()
|
||||||
else:
|
else:
|
||||||
output = data["text"].strip()
|
output = data["text"].strip()
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ prompt = PromptTemplate(
|
|||||||
template_scene=ChatScene.ChatNewKnowledge.value,
|
template_scene=ChatScene.ChatNewKnowledge.value,
|
||||||
input_variables=["context", "question"],
|
input_variables=["context", "question"],
|
||||||
response_format=None,
|
response_format=None,
|
||||||
template_define=None,
|
template_define=PROMPT_SCENE_DEFINE,
|
||||||
template=_DEFAULT_TEMPLATE,
|
template=_DEFAULT_TEMPLATE,
|
||||||
stream_out=PROMPT_NEED_NEED_STREAM_OUT,
|
stream_out=PROMPT_NEED_NEED_STREAM_OUT,
|
||||||
output_parser=NormalChatOutputParser(
|
output_parser=NormalChatOutputParser(
|
||||||
|
@ -11,6 +11,10 @@ from pilot.scene.chat_normal.out_parser import NormalChatOutputParser
|
|||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
||||||
|
PROMPT_SCENE_DEFINE = """A chat between a curious user and an artificial intelligence assistant, who very familiar with database related knowledge.
|
||||||
|
The assistant gives helpful, detailed, professional and polite answers to the user's questions. """
|
||||||
|
|
||||||
|
|
||||||
_DEFAULT_TEMPLATE = """ 基于以下已知的信息, 专业、简要的回答用户的问题,
|
_DEFAULT_TEMPLATE = """ 基于以下已知的信息, 专业、简要的回答用户的问题,
|
||||||
如果无法从提供的内容中获取答案, 请说: "知识库中提供的内容不足以回答此问题" 禁止胡乱编造。
|
如果无法从提供的内容中获取答案, 请说: "知识库中提供的内容不足以回答此问题" 禁止胡乱编造。
|
||||||
已知内容:
|
已知内容:
|
||||||
@ -28,7 +32,7 @@ prompt = PromptTemplate(
|
|||||||
template_scene=ChatScene.ChatKnowledge.value,
|
template_scene=ChatScene.ChatKnowledge.value,
|
||||||
input_variables=["context", "question"],
|
input_variables=["context", "question"],
|
||||||
response_format=None,
|
response_format=None,
|
||||||
template_define=None,
|
template_define=PROMPT_SCENE_DEFINE,
|
||||||
template=_DEFAULT_TEMPLATE,
|
template=_DEFAULT_TEMPLATE,
|
||||||
stream_out=PROMPT_NEED_NEED_STREAM_OUT,
|
stream_out=PROMPT_NEED_NEED_STREAM_OUT,
|
||||||
output_parser=NormalChatOutputParser(
|
output_parser=NormalChatOutputParser(
|
||||||
|
@ -11,10 +11,21 @@ from pilot.scene.chat_normal.out_parser import NormalChatOutputParser
|
|||||||
|
|
||||||
CFG = Config()
|
CFG = Config()
|
||||||
|
|
||||||
_DEFAULT_TEMPLATE = """ Based on the known information, provide professional and concise answers to the user's questions. If the answer cannot be obtained from the provided content, please say: 'The information provided in the knowledge base is not sufficient to answer this question.' Fabrication is prohibited.。
|
PROMPT_SCENE_DEFINE = """A chat between a curious human and an artificial intelligence assistant, who very familiar with database related knowledge.
|
||||||
known information:
|
The assistant gives helpful, detailed, professional and polite answers to the user's questions. """
|
||||||
|
|
||||||
|
|
||||||
|
# _DEFAULT_TEMPLATE = """ Based on the known information, provide professional and concise answers to the user's questions. If the answer cannot be obtained from the provided content, please say: 'The information provided in the knowledge base is not sufficient to answer this question.' Fabrication is prohibited.。
|
||||||
|
# known information:
|
||||||
|
# {context}
|
||||||
|
# question:
|
||||||
|
# {question}
|
||||||
|
# """
|
||||||
|
_DEFAULT_TEMPLATE = """ 基于以下已知的信息, 专业、简要的回答用户的问题,
|
||||||
|
如果无法从提供的内容中获取答案, 请说: "知识库中提供的内容不足以回答此问题" 禁止胡乱编造。
|
||||||
|
已知内容:
|
||||||
{context}
|
{context}
|
||||||
question:
|
问题:
|
||||||
{question}
|
{question}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -27,7 +38,7 @@ prompt = PromptTemplate(
|
|||||||
template_scene=ChatScene.ChatUrlKnowledge.value,
|
template_scene=ChatScene.ChatUrlKnowledge.value,
|
||||||
input_variables=["context", "question"],
|
input_variables=["context", "question"],
|
||||||
response_format=None,
|
response_format=None,
|
||||||
template_define=None,
|
template_define=PROMPT_SCENE_DEFINE,
|
||||||
template=_DEFAULT_TEMPLATE,
|
template=_DEFAULT_TEMPLATE,
|
||||||
stream_out=PROMPT_NEED_NEED_STREAM_OUT,
|
stream_out=PROMPT_NEED_NEED_STREAM_OUT,
|
||||||
output_parser=NormalChatOutputParser(
|
output_parser=NormalChatOutputParser(
|
||||||
|
@ -5,9 +5,12 @@ from langchain.document_loaders import WebBaseLoader
|
|||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
from langchain.text_splitter import CharacterTextSplitter
|
from langchain.text_splitter import CharacterTextSplitter
|
||||||
|
|
||||||
|
from pilot.configs.config import Config
|
||||||
|
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
||||||
from pilot.source_embedding import SourceEmbedding, register
|
from pilot.source_embedding import SourceEmbedding, register
|
||||||
|
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
||||||
|
|
||||||
|
CFG = Config()
|
||||||
class URLEmbedding(SourceEmbedding):
|
class URLEmbedding(SourceEmbedding):
|
||||||
"""url embedding for read url document."""
|
"""url embedding for read url document."""
|
||||||
|
|
||||||
@ -22,10 +25,15 @@ class URLEmbedding(SourceEmbedding):
|
|||||||
def read(self):
|
def read(self):
|
||||||
"""Load from url path."""
|
"""Load from url path."""
|
||||||
loader = WebBaseLoader(web_path=self.file_path)
|
loader = WebBaseLoader(web_path=self.file_path)
|
||||||
text_splitor = CharacterTextSplitter(
|
if CFG.LANGUAGE == "en":
|
||||||
chunk_size=1000, chunk_overlap=20, length_function=len
|
text_splitter = CharacterTextSplitter(
|
||||||
)
|
chunk_size=KNOWLEDGE_CHUNK_SPLIT_SIZE, chunk_overlap=20, length_function=len
|
||||||
return loader.load_and_split(text_splitor)
|
)
|
||||||
|
else:
|
||||||
|
text_splitter = CHNDocumentSplitter(
|
||||||
|
pdf=True, sentence_size=1000
|
||||||
|
)
|
||||||
|
return loader.load_and_split(text_splitter)
|
||||||
|
|
||||||
@register
|
@register
|
||||||
def data_process(self, documents: List[Document]):
|
def data_process(self, documents: List[Document]):
|
||||||
|
Loading…
Reference in New Issue
Block a user