mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-17 15:10:14 +00:00
fix:url embedding
This commit is contained in:
@@ -53,9 +53,11 @@ class BaseOutputParser(ABC):
|
||||
"""
|
||||
if data["error_code"] == 0:
|
||||
if "vicuna" in CFG.LLM_MODEL:
|
||||
output = data["text"][skip_echo_len + 11:].strip()
|
||||
# output = data["text"][skip_echo_len + 11:].strip()
|
||||
output = data["text"][skip_echo_len:].strip()
|
||||
elif "guanaco" in CFG.LLM_MODEL:
|
||||
output = data["text"][skip_echo_len + 14:].replace("<s>", "").strip()
|
||||
# output = data["text"][skip_echo_len + 14:].replace("<s>", "").strip()
|
||||
output = data["text"][skip_echo_len:].replace("<s>", "").strip()
|
||||
else:
|
||||
output = data["text"].strip()
|
||||
|
||||
|
@@ -11,7 +11,7 @@ from pilot.scene.chat_normal.out_parser import NormalChatOutputParser
|
||||
|
||||
CFG = Config()
|
||||
|
||||
PROMPT_SCENE_DEFINE = """A chat between a curious user and an artificial intelligence assistant, who very familiar with database related knowledge.
|
||||
PROMPT_SCENE_DEFINE = """A chat between a curious human and an artificial intelligence assistant, who very familiar with database related knowledge.
|
||||
The assistant gives helpful, detailed, professional and polite answers to the user's questions. """
|
||||
|
||||
|
||||
|
@@ -5,9 +5,12 @@ from langchain.document_loaders import WebBaseLoader
|
||||
from langchain.schema import Document
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
||||
|
||||
|
||||
CFG = Config()
|
||||
class URLEmbedding(SourceEmbedding):
|
||||
"""url embedding for read url document."""
|
||||
|
||||
@@ -22,10 +25,15 @@ class URLEmbedding(SourceEmbedding):
|
||||
def read(self):
|
||||
"""Load from url path."""
|
||||
loader = WebBaseLoader(web_path=self.file_path)
|
||||
text_splitor = CharacterTextSplitter(
|
||||
chunk_size=100, chunk_overlap=20, length_function=len
|
||||
)
|
||||
return loader.load_and_split(text_splitor)
|
||||
if CFG.LANGUAGE == "en":
|
||||
text_splitter = CharacterTextSplitter(
|
||||
chunk_size=KNOWLEDGE_CHUNK_SPLIT_SIZE, chunk_overlap=20, length_function=len
|
||||
)
|
||||
else:
|
||||
text_splitter = CHNDocumentSplitter(
|
||||
pdf=True, sentence_size=1000
|
||||
)
|
||||
return loader.load_and_split(text_splitter)
|
||||
|
||||
@register
|
||||
def data_process(self, documents: List[Document]):
|
||||
|
Reference in New Issue
Block a user