mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-15 05:59:59 +00:00
feat(rag): Support RAG SDK (#1322)
This commit is contained in:
@@ -1,23 +1,50 @@
|
||||
"""Module Of Knowledge."""
|
||||
|
||||
from .base import ChunkStrategy, Knowledge, KnowledgeType # noqa: F401
|
||||
from .csv import CSVKnowledge # noqa: F401
|
||||
from .docx import DocxKnowledge # noqa: F401
|
||||
from .factory import KnowledgeFactory # noqa: F401
|
||||
from .html import HTMLKnowledge # noqa: F401
|
||||
from .markdown import MarkdownKnowledge # noqa: F401
|
||||
from .pdf import PDFKnowledge # noqa: F401
|
||||
from .pptx import PPTXKnowledge # noqa: F401
|
||||
from .string import StringKnowledge # noqa: F401
|
||||
from .txt import TXTKnowledge # noqa: F401
|
||||
from .url import URLKnowledge # noqa: F401
|
||||
from typing import Any, Dict
|
||||
|
||||
__ALL__ = [
|
||||
_MODULE_CACHE: Dict[str, Any] = {}
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
# Lazy load
|
||||
import importlib
|
||||
|
||||
if name in _MODULE_CACHE:
|
||||
return _MODULE_CACHE[name]
|
||||
|
||||
_LIBS = {
|
||||
"KnowledgeFactory": "factory",
|
||||
"Knowledge": "base",
|
||||
"KnowledgeType": "base",
|
||||
"ChunkStrategy": "base",
|
||||
"CSVKnowledge": "csv",
|
||||
"DatasourceKnowledge": "datasource",
|
||||
"DocxKnowledge": "docx",
|
||||
"HTMLKnowledge": "html",
|
||||
"MarkdownKnowledge": "markdown",
|
||||
"PDFKnowledge": "pdf",
|
||||
"PPTXKnowledge": "pptx",
|
||||
"StringKnowledge": "string",
|
||||
"TXTKnowledge": "txt",
|
||||
"URLKnowledge": "url",
|
||||
}
|
||||
|
||||
if name in _LIBS:
|
||||
module_path = "." + _LIBS[name]
|
||||
module = importlib.import_module(module_path, __name__)
|
||||
attr = getattr(module, name)
|
||||
_MODULE_CACHE[name] = attr
|
||||
return attr
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"KnowledgeFactory",
|
||||
"Knowledge",
|
||||
"KnowledgeType",
|
||||
"ChunkStrategy",
|
||||
"CSVKnowledge",
|
||||
"DatasourceKnowledge",
|
||||
"DocxKnowledge",
|
||||
"HTMLKnowledge",
|
||||
"MarkdownKnowledge",
|
||||
|
@@ -25,6 +25,7 @@ class DocumentType(Enum):
|
||||
DOCX = "docx"
|
||||
TXT = "txt"
|
||||
HTML = "html"
|
||||
DATASOURCE = "datasource"
|
||||
|
||||
|
||||
class KnowledgeType(Enum):
|
||||
|
57
dbgpt/rag/knowledge/datasource.py
Normal file
57
dbgpt/rag/knowledge/datasource.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""Datasource Knowledge."""
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.core import Document
|
||||
from dbgpt.datasource import BaseConnector
|
||||
|
||||
from ..summary.rdbms_db_summary import _parse_db_summary
|
||||
from .base import ChunkStrategy, DocumentType, Knowledge, KnowledgeType
|
||||
|
||||
|
||||
class DatasourceKnowledge(Knowledge):
|
||||
"""Datasource Knowledge."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
connector: BaseConnector,
|
||||
summary_template: str = "{table_name}({columns})",
|
||||
knowledge_type: Optional[KnowledgeType] = KnowledgeType.DOCUMENT,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Create Datasource Knowledge with Knowledge arguments.
|
||||
|
||||
Args:
|
||||
path(str, optional): file path
|
||||
knowledge_type(KnowledgeType, optional): knowledge type
|
||||
data_loader(Any, optional): loader
|
||||
"""
|
||||
self._connector = connector
|
||||
self._summary_template = summary_template
|
||||
super().__init__(knowledge_type=knowledge_type, **kwargs)
|
||||
|
||||
def _load(self) -> List[Document]:
|
||||
"""Load datasource document from data_loader."""
|
||||
docs = []
|
||||
for table_summary in _parse_db_summary(self._connector, self._summary_template):
|
||||
docs.append(
|
||||
Document(content=table_summary, metadata={"source": "database"})
|
||||
)
|
||||
return docs
|
||||
|
||||
@classmethod
|
||||
def support_chunk_strategy(cls) -> List[ChunkStrategy]:
|
||||
"""Return support chunk strategy."""
|
||||
return [
|
||||
ChunkStrategy.CHUNK_BY_SIZE,
|
||||
ChunkStrategy.CHUNK_BY_SEPARATOR,
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def type(cls) -> KnowledgeType:
|
||||
"""Knowledge type of Datasource."""
|
||||
return KnowledgeType.DOCUMENT
|
||||
|
||||
@classmethod
|
||||
def document_type(cls) -> DocumentType:
|
||||
"""Return document type."""
|
||||
return DocumentType.DATASOURCE
|
@@ -156,6 +156,7 @@ class KnowledgeFactory:
|
||||
"""Get all knowledge subclasses."""
|
||||
from dbgpt.rag.knowledge.base import Knowledge # noqa: F401
|
||||
from dbgpt.rag.knowledge.csv import CSVKnowledge # noqa: F401
|
||||
from dbgpt.rag.knowledge.datasource import DatasourceKnowledge # noqa: F401
|
||||
from dbgpt.rag.knowledge.docx import DocxKnowledge # noqa: F401
|
||||
from dbgpt.rag.knowledge.html import HTMLKnowledge # noqa: F401
|
||||
from dbgpt.rag.knowledge.markdown import MarkdownKnowledge # noqa: F401
|
||||
|
Reference in New Issue
Block a user