refactor: Refactor proxy LLM (#1064)

This commit is contained in:
Fangyin Cheng
2024-01-14 21:01:37 +08:00
committed by GitHub
parent a035433170
commit 22bfd01c4b
95 changed files with 2049 additions and 1294 deletions

View File

@@ -1,14 +1,14 @@
from abc import abstractmethod, ABC
from abc import ABC, abstractmethod
from enum import Enum
from typing import Optional, Any, List
from typing import Any, List, Optional
from dbgpt.rag.chunk import Document
from dbgpt.rag.text_splitter.text_splitter import (
RecursiveCharacterTextSplitter,
MarkdownHeaderTextSplitter,
ParagraphTextSplitter,
CharacterTextSplitter,
MarkdownHeaderTextSplitter,
PageTextSplitter,
ParagraphTextSplitter,
RecursiveCharacterTextSplitter,
SeparatorTextSplitter,
)

View File

@@ -1,11 +1,12 @@
from typing import Optional, Any, List
import csv
from typing import Any, List, Optional
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import (
KnowledgeType,
Knowledge,
ChunkStrategy,
DocumentType,
Knowledge,
KnowledgeType,
)

View File

@@ -1,13 +1,14 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
import docx
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import (
KnowledgeType,
Knowledge,
ChunkStrategy,
DocumentType,
Knowledge,
KnowledgeType,
)
import docx
class DocxKnowledge(Knowledge):

View File

@@ -1,7 +1,6 @@
from typing import Optional
from typing import List
from typing import List, Optional
from dbgpt.rag.knowledge.base import KnowledgeType, Knowledge
from dbgpt.rag.knowledge.base import Knowledge, KnowledgeType
from dbgpt.rag.knowledge.string import StringKnowledge
from dbgpt.rag.knowledge.url import URLKnowledge
@@ -32,11 +31,21 @@ class KnowledgeFactory:
Args:
datasource: path of the file to convert
knowledge_type: type of knowledge
Example:
Examples:
.. code-block:: python
>>> from dbgpt.rag.knowledge.factory import KnowledgeFactory
>>> url_knowlege = KnowledgeFactory.create(datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL)
>>> doc_knowlege = KnowledgeFactory.create(datasource="path/to/document.pdf", knowledge_type=KnowledgeType.DOCUMENT)
from dbgpt.rag.knowledge.factory import KnowledgeFactory
url_knowlege = KnowledgeFactory.create(
datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL
)
doc_knowlege = KnowledgeFactory.create(
datasource="path/to/document.pdf",
knowledge_type=KnowledgeType.DOCUMENT,
)
"""
match knowledge_type:
case KnowledgeType.DOCUMENT:
@@ -57,13 +66,22 @@ class KnowledgeFactory:
knowledge_type: Optional[KnowledgeType] = KnowledgeType.DOCUMENT,
) -> Knowledge:
"""Create knowledge from path
Args:
param file_path: path of the file to convert
param knowledge_type: type of knowledge
Example:
Examples:
.. code-block:: python
>>> from dbgpt.rag.knowledge.factory import KnowledgeFactory
>>> doc_knowlege = KnowledgeFactory.create(datasource="path/to/document.pdf", knowledge_type=KnowledgeType.DOCUMENT)
from dbgpt.rag.knowledge.factory import KnowledgeFactory
doc_knowlege = KnowledgeFactory.create(
datasource="path/to/document.pdf",
knowledge_type=KnowledgeType.DOCUMENT,
)
"""
factory = cls(file_path=file_path, knowledge_type=knowledge_type)
return factory._select_document_knowledge(
@@ -76,13 +94,21 @@ class KnowledgeFactory:
knowledge_type: Optional[KnowledgeType] = KnowledgeType.URL,
) -> Knowledge:
"""Create knowledge from url
Args:
param url: url of the file to convert
param knowledge_type: type of knowledge
Example:
Examples:
.. code-block:: python
>>> from dbgpt.rag.knowledge.factory import KnowledgeFactory
>>> url_knowlege = KnowledgeFactory.create(datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL)
from dbgpt.rag.knowledge.factory import KnowledgeFactory
url_knowlege = KnowledgeFactory.create(
datasource="https://www.baidu.com", knowledge_type=KnowledgeType.URL
)
"""
return URLKnowledge(
url=url,
@@ -130,14 +156,14 @@ class KnowledgeFactory:
def _get_knowledge_subclasses() -> List[Knowledge]:
"""get all knowledge subclasses"""
from dbgpt.rag.knowledge.base import Knowledge
from dbgpt.rag.knowledge.pdf import PDFKnowledge
from dbgpt.rag.knowledge.docx import DocxKnowledge
from dbgpt.rag.knowledge.markdown import MarkdownKnowledge
from dbgpt.rag.knowledge.csv import CSVKnowledge
from dbgpt.rag.knowledge.txt import TXTKnowledge
from dbgpt.rag.knowledge.pptx import PPTXKnowledge
from dbgpt.rag.knowledge.docx import DocxKnowledge
from dbgpt.rag.knowledge.html import HTMLKnowledge
from dbgpt.rag.knowledge.url import URLKnowledge
from dbgpt.rag.knowledge.markdown import MarkdownKnowledge
from dbgpt.rag.knowledge.pdf import PDFKnowledge
from dbgpt.rag.knowledge.pptx import PPTXKnowledge
from dbgpt.rag.knowledge.string import StringKnowledge
from dbgpt.rag.knowledge.txt import TXTKnowledge
from dbgpt.rag.knowledge.url import URLKnowledge
return Knowledge.__subclasses__()

View File

@@ -1,13 +1,13 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
import chardet
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import (
Knowledge,
KnowledgeType,
ChunkStrategy,
DocumentType,
Knowledge,
KnowledgeType,
)

View File

@@ -1,11 +1,11 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import (
KnowledgeType,
Knowledge,
ChunkStrategy,
DocumentType,
Knowledge,
KnowledgeType,
)

View File

@@ -1,11 +1,11 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import (
Knowledge,
KnowledgeType,
ChunkStrategy,
DocumentType,
Knowledge,
KnowledgeType,
)

View File

@@ -1,11 +1,11 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import (
Knowledge,
KnowledgeType,
ChunkStrategy,
DocumentType,
Knowledge,
KnowledgeType,
)

View File

@@ -1,7 +1,7 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import KnowledgeType, Knowledge, ChunkStrategy
from dbgpt.rag.knowledge.base import ChunkStrategy, Knowledge, KnowledgeType
class StringKnowledge(Knowledge):

View File

@@ -1,6 +1,7 @@
import pytest
from unittest.mock import MagicMock, mock_open, patch
import pytest
from dbgpt.rag.knowledge.csv import CSVKnowledge
MOCK_CSV_DATA = "id,name,age\n1,John Doe,30\n2,Jane Smith,25\n3,Bob Johnson,40"

View File

@@ -1,6 +1,7 @@
import pytest
from unittest.mock import MagicMock, patch
import pytest
from dbgpt.rag.knowledge.docx import DocxKnowledge

View File

@@ -1,6 +1,7 @@
import pytest
from unittest.mock import mock_open, patch
import pytest
from dbgpt.rag.knowledge.html import HTMLKnowledge
MOCK_HTML_CONTENT = b"""

View File

@@ -1,6 +1,7 @@
import pytest
from unittest.mock import mock_open, patch
import pytest
from dbgpt.rag.knowledge.markdown import MarkdownKnowledge
MOCK_MARKDOWN_DATA = """# Header 1

View File

@@ -1,5 +1,6 @@
from unittest.mock import MagicMock, mock_open, patch
import pytest
from unittest.mock import MagicMock, patch, mock_open
from dbgpt.rag.knowledge.pdf import PDFKnowledge

View File

@@ -1,6 +1,7 @@
import pytest
from unittest.mock import mock_open, patch
import pytest
from dbgpt.rag.knowledge.txt import TXTKnowledge
MOCK_TXT_CONTENT = b"Sample text content for testing.\nAnother line of text."

View File

@@ -1,13 +1,13 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
import chardet
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import (
Knowledge,
KnowledgeType,
ChunkStrategy,
DocumentType,
Knowledge,
KnowledgeType,
)

View File

@@ -1,7 +1,7 @@
from typing import Optional, Any, List
from typing import Any, List, Optional
from dbgpt.rag.chunk import Document
from dbgpt.rag.knowledge.base import KnowledgeType, Knowledge, ChunkStrategy
from dbgpt.rag.knowledge.base import ChunkStrategy, Knowledge, KnowledgeType
class URLKnowledge(Knowledge):