mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-15 05:59:59 +00:00
refactor: Refactor proxy LLM (#1064)
This commit is contained in:
@@ -1,14 +1,14 @@
|
||||
from abc import abstractmethod, ABC
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.text_splitter.text_splitter import (
|
||||
RecursiveCharacterTextSplitter,
|
||||
MarkdownHeaderTextSplitter,
|
||||
ParagraphTextSplitter,
|
||||
CharacterTextSplitter,
|
||||
MarkdownHeaderTextSplitter,
|
||||
PageTextSplitter,
|
||||
ParagraphTextSplitter,
|
||||
RecursiveCharacterTextSplitter,
|
||||
SeparatorTextSplitter,
|
||||
)
|
||||
|
||||
|
@@ -1,11 +1,12 @@
|
||||
from typing import Optional, Any, List
|
||||
import csv
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import (
|
||||
KnowledgeType,
|
||||
Knowledge,
|
||||
ChunkStrategy,
|
||||
DocumentType,
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -1,13 +1,14 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import docx
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import (
|
||||
KnowledgeType,
|
||||
Knowledge,
|
||||
ChunkStrategy,
|
||||
DocumentType,
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
)
|
||||
import docx
|
||||
|
||||
|
||||
class DocxKnowledge(Knowledge):
|
||||
|
@@ -1,7 +1,6 @@
|
||||
from typing import Optional
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from dbgpt.rag.knowledge.base import KnowledgeType, Knowledge
|
||||
from dbgpt.rag.knowledge.base import Knowledge, KnowledgeType
|
||||
from dbgpt.rag.knowledge.string import StringKnowledge
|
||||
from dbgpt.rag.knowledge.url import URLKnowledge
|
||||
|
||||
@@ -32,11 +31,21 @@ class KnowledgeFactory:
|
||||
Args:
|
||||
datasource: path of the file to convert
|
||||
knowledge_type: type of knowledge
|
||||
Example:
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
>>> from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
>>> url_knowlege = KnowledgeFactory.create(datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL)
|
||||
>>> doc_knowlege = KnowledgeFactory.create(datasource="path/to/document.pdf", knowledge_type=KnowledgeType.DOCUMENT)
|
||||
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
|
||||
url_knowlege = KnowledgeFactory.create(
|
||||
datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL
|
||||
)
|
||||
doc_knowlege = KnowledgeFactory.create(
|
||||
datasource="path/to/document.pdf",
|
||||
knowledge_type=KnowledgeType.DOCUMENT,
|
||||
)
|
||||
|
||||
"""
|
||||
match knowledge_type:
|
||||
case KnowledgeType.DOCUMENT:
|
||||
@@ -57,13 +66,22 @@ class KnowledgeFactory:
|
||||
knowledge_type: Optional[KnowledgeType] = KnowledgeType.DOCUMENT,
|
||||
) -> Knowledge:
|
||||
"""Create knowledge from path
|
||||
|
||||
Args:
|
||||
param file_path: path of the file to convert
|
||||
param knowledge_type: type of knowledge
|
||||
Example:
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
>>> from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
>>> doc_knowlege = KnowledgeFactory.create(datasource="path/to/document.pdf", knowledge_type=KnowledgeType.DOCUMENT)
|
||||
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
|
||||
doc_knowlege = KnowledgeFactory.create(
|
||||
datasource="path/to/document.pdf",
|
||||
knowledge_type=KnowledgeType.DOCUMENT,
|
||||
)
|
||||
|
||||
"""
|
||||
factory = cls(file_path=file_path, knowledge_type=knowledge_type)
|
||||
return factory._select_document_knowledge(
|
||||
@@ -76,13 +94,21 @@ class KnowledgeFactory:
|
||||
knowledge_type: Optional[KnowledgeType] = KnowledgeType.URL,
|
||||
) -> Knowledge:
|
||||
"""Create knowledge from url
|
||||
|
||||
Args:
|
||||
param url: url of the file to convert
|
||||
param knowledge_type: type of knowledge
|
||||
Example:
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
>>> from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
>>> url_knowlege = KnowledgeFactory.create(datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL)
|
||||
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
|
||||
url_knowlege = KnowledgeFactory.create(
|
||||
datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL
|
||||
)
|
||||
|
||||
"""
|
||||
return URLKnowledge(
|
||||
url=url,
|
||||
@@ -130,14 +156,14 @@ class KnowledgeFactory:
|
||||
def _get_knowledge_subclasses() -> List[Knowledge]:
|
||||
"""get all knowledge subclasses"""
|
||||
from dbgpt.rag.knowledge.base import Knowledge
|
||||
from dbgpt.rag.knowledge.pdf import PDFKnowledge
|
||||
from dbgpt.rag.knowledge.docx import DocxKnowledge
|
||||
from dbgpt.rag.knowledge.markdown import MarkdownKnowledge
|
||||
from dbgpt.rag.knowledge.csv import CSVKnowledge
|
||||
from dbgpt.rag.knowledge.txt import TXTKnowledge
|
||||
from dbgpt.rag.knowledge.pptx import PPTXKnowledge
|
||||
from dbgpt.rag.knowledge.docx import DocxKnowledge
|
||||
from dbgpt.rag.knowledge.html import HTMLKnowledge
|
||||
from dbgpt.rag.knowledge.url import URLKnowledge
|
||||
from dbgpt.rag.knowledge.markdown import MarkdownKnowledge
|
||||
from dbgpt.rag.knowledge.pdf import PDFKnowledge
|
||||
from dbgpt.rag.knowledge.pptx import PPTXKnowledge
|
||||
from dbgpt.rag.knowledge.string import StringKnowledge
|
||||
from dbgpt.rag.knowledge.txt import TXTKnowledge
|
||||
from dbgpt.rag.knowledge.url import URLKnowledge
|
||||
|
||||
return Knowledge.__subclasses__()
|
||||
|
@@ -1,13 +1,13 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import chardet
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import (
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
ChunkStrategy,
|
||||
DocumentType,
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -1,11 +1,11 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import (
|
||||
KnowledgeType,
|
||||
Knowledge,
|
||||
ChunkStrategy,
|
||||
DocumentType,
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -1,11 +1,11 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import (
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
ChunkStrategy,
|
||||
DocumentType,
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -1,11 +1,11 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import (
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
ChunkStrategy,
|
||||
DocumentType,
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import KnowledgeType, Knowledge, ChunkStrategy
|
||||
from dbgpt.rag.knowledge.base import ChunkStrategy, Knowledge, KnowledgeType
|
||||
|
||||
|
||||
class StringKnowledge(Knowledge):
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, mock_open, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from dbgpt.rag.knowledge.csv import CSVKnowledge
|
||||
|
||||
MOCK_CSV_DATA = "id,name,age\n1,John Doe,30\n2,Jane Smith,25\n3,Bob Johnson,40"
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from dbgpt.rag.knowledge.docx import DocxKnowledge
|
||||
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
from unittest.mock import mock_open, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from dbgpt.rag.knowledge.html import HTMLKnowledge
|
||||
|
||||
MOCK_HTML_CONTENT = b"""
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
from unittest.mock import mock_open, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from dbgpt.rag.knowledge.markdown import MarkdownKnowledge
|
||||
|
||||
MOCK_MARKDOWN_DATA = """# Header 1
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from unittest.mock import MagicMock, mock_open, patch
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, mock_open
|
||||
|
||||
from dbgpt.rag.knowledge.pdf import PDFKnowledge
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import pytest
|
||||
from unittest.mock import mock_open, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from dbgpt.rag.knowledge.txt import TXTKnowledge
|
||||
|
||||
MOCK_TXT_CONTENT = b"Sample text content for testing.\nAnother line of text."
|
||||
|
@@ -1,13 +1,13 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import chardet
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import (
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
ChunkStrategy,
|
||||
DocumentType,
|
||||
Knowledge,
|
||||
KnowledgeType,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from typing import Optional, Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.rag.chunk import Document
|
||||
from dbgpt.rag.knowledge.base import KnowledgeType, Knowledge, ChunkStrategy
|
||||
from dbgpt.rag.knowledge.base import ChunkStrategy, Knowledge, KnowledgeType
|
||||
|
||||
|
||||
class URLKnowledge(Knowledge):
|
||||
|
Reference in New Issue
Block a user