chore: Add pylint for DB-GPT rag lib (#1267)

2025-09-17 23:18:20 +00:00 · 2024-03-07 23:27:43 +08:00
parent aaaf34db17
commit 7446817340
70 changed files with 1135 additions and 587 deletions
--- a/dbgpt/rag/extractor/init.py
+++ b/dbgpt/rag/extractor/init.py
@@ -0,0 +1,5 @@
+"""Module for extracting information."""
+from .base import Extractor
+from .summary import SummaryExtractor
+
+__all__ = ["Extractor", "SummaryExtractor"]
--- a/dbgpt/rag/extractor/base.py
+++ b/dbgpt/rag/extractor/base.py
@@ -1,3 +1,4 @@
+"""Base Extractor Base class."""
 from abc import ABC, abstractmethod
 from typing import List

@@ -6,14 +7,18 @@ from dbgpt.rag.chunk import Chunk


 class Extractor(ABC):
-    """Extractor Base class, it's apply for Summary Extractor, Keyword Extractor, Triplets Extractor, Question Extractor, etc."""
+    """Base Extractor Base class.
+
+    It's apply for Summary Extractor, Keyword Extractor, Triplets Extractor, Question
+    Extractor, etc.
+    """

    def __init__(self, llm_client: LLMClient) -> None:
        """Initialize the Extractor."""
        self._llm_client = llm_client

    def extract(self, chunks: List[Chunk]) -> str:
-        """Extracts chunks.
+        """Return extracted metadata from chunks.

        Args:
            chunks (List[Chunk]): extract metadata from chunks
@@ -30,7 +35,7 @@ class Extractor(ABC):

    @abstractmethod
    def _extract(self, chunks: List[Chunk]) -> str:
-        """Extracts chunks.
+        """Return extracted metadata from chunks.

        Args:
            chunks (List[Chunk]): extract metadata from chunks
--- a/dbgpt/rag/extractor/summary.py
+++ b/dbgpt/rag/extractor/summary.py
@@ -1,3 +1,5 @@
+"""Summary Extractor, it can extract document summary."""
+
 from typing import List, Optional

 from dbgpt._private.llm_metadata import LLMMetadata
@@ -13,16 +15,21 @@ SUMMARY_PROMPT_TEMPLATE_ZH = """请根据提供的上下文信息的进行精简
 """

 SUMMARY_PROMPT_TEMPLATE_EN = """
-Write a quick summary of the following context: 
+Write a quick summary of the following context:
 {context}
-the summary should be as concise as possible and not overly lengthy.Please keep the answer within approximately 200 characters.
+the summary should be as concise as possible and not overly lengthy.Please keep the
+answer within approximately 200 characters.
 """

-REFINE_SUMMARY_TEMPLATE_ZH = """我们已经提供了一个到某一点的现有总结:{context}\n 请根据你之前推理的内容进行总结,总结回答的时候最好按照1.2.3.进行. 注意:请用<中文>来进行总结。"""
+REFINE_SUMMARY_TEMPLATE_ZH = """我们已经提供了一个到某一点的现有总结:{context}
+请根据你之前推理的内容进行总结,总结回答的时候最好按照1.2.3.进行. 注意:请用<中文>来进行总结。
+"""

 REFINE_SUMMARY_TEMPLATE_EN = """
-We have provided an existing summary up to a certain point: {context}, We have the opportunity to refine the existing summary (only if needed) with some more context below. 
-\nBased on the previous reasoning, please summarize the final conclusion in accordance with points 1.2.and 3.
+We have provided an existing summary up to a certain point: {context}, We have the
+opportunity to refine the existing summary (only if needed) with some more context
+below. \nBased on the previous reasoning, please summarize the final conclusion in
+accordance with points 1.2.and 3.
 """


@@ -31,18 +38,29 @@ class SummaryExtractor(Extractor):

    def __init__(
        self,
-        llm_client: Optional[LLMClient],
-        model_name: Optional[str] = None,
+        llm_client: LLMClient,
+        model_name: str,
        llm_metadata: Optional[LLMMetadata] = None,
        language: Optional[str] = "en",
-        max_iteration_with_llm: Optional[int] = 5,
-        concurrency_limit_with_llm: Optional[int] = 3,
+        max_iteration_with_llm: int = 5,
+        concurrency_limit_with_llm: int = 3,
    ):
+        """Create SummaryExtractor.
+
+        Args:
+            llm_client: (Optional[LLMClient]): The LLM client. Defaults to None.
+            model_name: str
+            llm_metadata: LLMMetadata
+            language: (Optional[str]): The language of the prompt. Defaults to "en".
+            max_iteration_with_llm: (Optional[int]): The max iteration with llm.
+                Defaults to 5.
+            concurrency_limit_with_llm: (Optional[int]): The concurrency limit with llm.
+                Defaults to 3.
+        """
        self._llm_client = llm_client
        self._model_name = model_name
-        self.llm_metadata = llm_metadata or LLMMetadata
+        self.llm_metadata = llm_metadata
        self._language = language
-        self._concurrency_limit_with_llm = concurrency_limit_with_llm
        self._prompt_template = (
            SUMMARY_PROMPT_TEMPLATE_EN
            if language == "en"
@@ -55,23 +73,15 @@ class SummaryExtractor(Extractor):
        )
        self._concurrency_limit_with_llm = concurrency_limit_with_llm
        self._max_iteration_with_llm = max_iteration_with_llm
-        self._concurrency_limit_with_llm = concurrency_limit_with_llm
-
-        """Initialize the Extractor.
-        Args:
-            llm_client: (Optional[LLMClient]): The LLM client. Defaults to None.
-            model_name: str
-            llm_metadata: LLMMetadata
-            language: (Optional[str]): The language of the prompt. Defaults to "en".
-            max_iteration_with_llm: (Optional[int]): The max iteration with llm. Defaults to 5.
-            concurrency_limit_with_llm: (Optional[int]): The concurrency limit with llm. Defaults to 3.
-        """

    async def _aextract(self, chunks: List[Chunk]) -> str:
-        """async document extract summary
+        """Return extracted metadata from chunks of async.
+
        Args:
-            - model_name: str
-            - chunk_docs: List[Document]
+            chunks (List[Chunk]): extract metadata from chunks
+
+        Returns:
+            str: The summary of the documents.
        """
        texts = [doc.content for doc in chunks]
        from dbgpt.util.prompt_util import PromptHelper
@@ -95,9 +105,13 @@ class SummaryExtractor(Extractor):
            return summary_outs[0]

    def _extract(self, chunks: List[Chunk]) -> str:
-        """document extract summary
+        """Return summary of the documents.
+
        Args:
-            - chunk_docs: List[Document]
+            chunks(List[Chunk]): list of chunks
+
+        Returns:
+            summary: str
        """
        loop = utils.get_or_create_event_loop()
        return loop.run_until_complete(self._aextract(chunks=chunks))
@@ -106,7 +120,10 @@ class SummaryExtractor(Extractor):
        self,
        docs: List[str],
    ) -> str:
-        """Extract summary by mapreduce mode
+        """Return the summary of the documents.
+
+        Extract summary by mapreduce mode.
+
        map -> multi async call llm to generate summary
        reduce -> merge the summaries by map process
        Args:
@@ -132,10 +149,12 @@ class SummaryExtractor(Extractor):
    async def _llm_run_tasks(
        self, chunk_texts: List[str], prompt_template: str
    ) -> List[str]:
-        """llm run tasks
+        """Run llm tasks.
+
        Args:
            chunk_texts: List[str]
            prompt_template: str
+
        Returns:
            summary_outs: List[str]
        """