mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-09 20:28:07 +00:00
fix:knowledge_init path
1.knowledge_init path 2.url embedding chunk
This commit is contained in:
parent
8baa1d9464
commit
1868b228bc
@ -3,7 +3,7 @@ from typing import List
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from langchain.document_loaders import WebBaseLoader
|
from langchain.document_loaders import WebBaseLoader
|
||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
from langchain.text_splitter import CharacterTextSplitter
|
from langchain.text_splitter import CharacterTextSplitter, SpacyTextSplitter
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
|
||||||
@ -33,7 +33,11 @@ class URLEmbedding(SourceEmbedding):
|
|||||||
length_function=len,
|
length_function=len,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
text_splitter = CHNDocumentSplitter(pdf=True, sentence_size=1000)
|
text_splitter = SpacyTextSplitter(
|
||||||
|
pipeline="zh_core_web_sm",
|
||||||
|
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||||
|
chunk_overlap=100,
|
||||||
|
)
|
||||||
return loader.load_and_split(text_splitter)
|
return loader.load_and_split(text_splitter)
|
||||||
|
|
||||||
@register
|
@register
|
||||||
|
@ -3,12 +3,12 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
|
||||||
|
|
||||||
from pilot.embedding_engine.knowledge_type import KnowledgeType
|
from pilot.embedding_engine.knowledge_type import KnowledgeType
|
||||||
from pilot.openapi.knowledge.knowledge_service import KnowledgeService
|
from pilot.openapi.knowledge.knowledge_service import KnowledgeService
|
||||||
from pilot.openapi.knowledge.request.knowledge_request import KnowledgeSpaceRequest
|
from pilot.openapi.knowledge.request.knowledge_request import KnowledgeSpaceRequest
|
||||||
|
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
|
|
||||||
|
|
||||||
from pilot.configs.config import Config
|
from pilot.configs.config import Config
|
||||||
from pilot.configs.model_config import (
|
from pilot.configs.model_config import (
|
||||||
|
Loading…
Reference in New Issue
Block a user