FIX:1598 Use PageTextSplitter for DatasourceKnowledge (#1599)

Co-authored-by: shenk-b <shenk-b@glodon.com>
Co-authored-by: aries_ckt <916701291@qq.com>
This commit is contained in:
Kevin.Shin 2024-06-06 13:45:50 +08:00 committed by GitHub
parent 59b7aa790b
commit 20e7ccc831
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 13 additions and 1 deletions

View File

@ -52,6 +52,7 @@ class DatasourceKnowledge(Knowledge):
return [
ChunkStrategy.CHUNK_BY_SIZE,
ChunkStrategy.CHUNK_BY_SEPARATOR,
ChunkStrategy.CHUNK_BY_PAGE,
]
@classmethod
@ -63,3 +64,12 @@ class DatasourceKnowledge(Knowledge):
def document_type(cls) -> DocumentType:
"""Return document type."""
return DocumentType.DATASOURCE
@classmethod
def default_chunk_strategy(cls) -> ChunkStrategy:
"""Return default chunk strategy.
Returns:
ChunkStrategy: default chunk strategy
"""
return ChunkStrategy.CHUNK_BY_PAGE

View File

@ -101,8 +101,10 @@ class DBSummaryClient:
from dbgpt.rag.assembler.db_schema import DBSchemaAssembler
db_assembler = DBSchemaAssembler.load_from_connection(
connector=db_summary_client.db, vector_store_connector=vector_connector
connector=db_summary_client.db,
vector_store_connector=vector_connector,
)
if len(db_assembler.get_chunks()) > 0:
db_assembler.persist()
else: