mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-29 21:40:44 +00:00
update:knowledge env
This commit is contained in:
@@ -8,20 +8,21 @@ from bs4 import BeautifulSoup
|
||||
from langchain.document_loaders import TextLoader
|
||||
from langchain.schema import Document
|
||||
|
||||
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
||||
from pilot.configs.config import Config
|
||||
from pilot.source_embedding import SourceEmbedding, register
|
||||
from pilot.source_embedding.EncodeTextLoader import EncodeTextLoader
|
||||
from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
||||
class MarkdownEmbedding(SourceEmbedding):
|
||||
"""markdown embedding for read markdown document."""
|
||||
|
||||
def __init__(self, file_path, model_name, vector_store_config):
|
||||
def __init__(self, file_path, vector_store_config):
|
||||
"""Initialize with markdown path."""
|
||||
super().__init__(file_path, model_name, vector_store_config)
|
||||
super().__init__(file_path, vector_store_config)
|
||||
self.file_path = file_path
|
||||
self.model_name = model_name
|
||||
self.vector_store_config = vector_store_config
|
||||
# self.encoding = encoding
|
||||
|
||||
@@ -30,7 +31,7 @@ class MarkdownEmbedding(SourceEmbedding):
|
||||
"""Load from markdown path."""
|
||||
loader = EncodeTextLoader(self.file_path)
|
||||
text_splitter = CHNDocumentSplitter(
|
||||
pdf=True, sentence_size=KNOWLEDGE_CHUNK_SPLIT_SIZE
|
||||
pdf=True, sentence_size=CFG.KNOWLEDGE_CHUNK_SIZE
|
||||
)
|
||||
return loader.load_and_split(text_splitter)
|
||||
|
||||
|
Reference in New Issue
Block a user