diff --git a/dbgpt/rag/knowledge/datasource.py b/dbgpt/rag/knowledge/datasource.py index cbafe3d41..78ae045dd 100644 --- a/dbgpt/rag/knowledge/datasource.py +++ b/dbgpt/rag/knowledge/datasource.py @@ -52,6 +52,7 @@ class DatasourceKnowledge(Knowledge): return [ ChunkStrategy.CHUNK_BY_SIZE, ChunkStrategy.CHUNK_BY_SEPARATOR, + ChunkStrategy.CHUNK_BY_PAGE, ] @classmethod @@ -63,3 +64,12 @@ class DatasourceKnowledge(Knowledge): def document_type(cls) -> DocumentType: """Return document type.""" return DocumentType.DATASOURCE + + @classmethod + def default_chunk_strategy(cls) -> ChunkStrategy: + """Return default chunk strategy. + + Returns: + ChunkStrategy: default chunk strategy + """ + return ChunkStrategy.CHUNK_BY_PAGE diff --git a/dbgpt/rag/summary/db_summary_client.py b/dbgpt/rag/summary/db_summary_client.py index 29eb0e046..e77560477 100644 --- a/dbgpt/rag/summary/db_summary_client.py +++ b/dbgpt/rag/summary/db_summary_client.py @@ -101,8 +101,10 @@ class DBSummaryClient: from dbgpt.rag.assembler.db_schema import DBSchemaAssembler db_assembler = DBSchemaAssembler.load_from_connection( - connector=db_summary_client.db, vector_store_connector=vector_connector + connector=db_summary_client.db, + vector_store_connector=vector_connector, ) + if len(db_assembler.get_chunks()) > 0: db_assembler.persist() else: