mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-13 21:21:08 +00:00
feat(core): Support i18n (#1327)
This commit is contained in:
@@ -10,7 +10,7 @@ from .embedding import ( # noqa: F401
|
||||
EmbeddingRetrieverOperator,
|
||||
)
|
||||
from .evaluation import RetrieverEvaluatorOperator # noqa: F401
|
||||
from .knowledge import KnowledgeOperator # noqa: F401
|
||||
from .knowledge import ChunksToStringOperator, KnowledgeOperator # noqa: F401
|
||||
from .rerank import RerankOperator # noqa: F401
|
||||
from .rewrite import QueryRewriteOperator # noqa: F401
|
||||
from .summary import SummaryAssemblerOperator # noqa: F401
|
||||
@@ -22,6 +22,7 @@ __all__ = [
|
||||
"EmbeddingRetrieverOperator",
|
||||
"EmbeddingAssemblerOperator",
|
||||
"KnowledgeOperator",
|
||||
"ChunksToStringOperator",
|
||||
"RerankOperator",
|
||||
"QueryRewriteOperator",
|
||||
"SummaryAssemblerOperator",
|
||||
|
@@ -4,8 +4,10 @@ from functools import reduce
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from dbgpt.core import Chunk
|
||||
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata
|
||||
from dbgpt.core.interface.operators.retriever import RetrieverOperator
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
from ..assembler.embedding import EmbeddingAssembler
|
||||
from ..chunk_manager import ChunkParameters
|
||||
@@ -19,6 +21,71 @@ from .assembler import AssemblerOperator
|
||||
class EmbeddingRetrieverOperator(RetrieverOperator[Union[str, List[str]], List[Chunk]]):
|
||||
"""The Embedding Retriever Operator."""
|
||||
|
||||
metadata = ViewMetadata(
|
||||
label=_("Embedding Retriever Operator"),
|
||||
name="embedding_retriever_operator",
|
||||
description=_("Retrieve candidates from vector store."),
|
||||
category=OperatorCategory.RAG,
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("Vector Store Connector"),
|
||||
"vector_store_connector",
|
||||
VectorStoreConnector,
|
||||
description=_("The vector store connector."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Top K"),
|
||||
"top_k",
|
||||
int,
|
||||
description=_("The number of candidates."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Score Threshold"),
|
||||
"score_threshold",
|
||||
float,
|
||||
description=_(
|
||||
"The score threshold, if score of candidate is less than it, it "
|
||||
"will be filtered."
|
||||
),
|
||||
optional=True,
|
||||
default=0.3,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Query Rewrite"),
|
||||
"query_rewrite",
|
||||
QueryRewrite,
|
||||
description=_("The query rewrite resource."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Rerank"),
|
||||
"rerank",
|
||||
Ranker,
|
||||
description=_("The rerank."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
],
|
||||
inputs=[
|
||||
IOField.build_from(
|
||||
_("Query"),
|
||||
"query",
|
||||
str,
|
||||
description=_("The query to retrieve."),
|
||||
)
|
||||
],
|
||||
outputs=[
|
||||
IOField.build_from(
|
||||
_("Candidates"),
|
||||
"candidates",
|
||||
Chunk,
|
||||
description=_("The retrieved candidates."),
|
||||
is_list=True,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vector_store_connector: VectorStoreConnector,
|
||||
@@ -53,12 +120,52 @@ class EmbeddingRetrieverOperator(RetrieverOperator[Union[str, List[str]], List[C
|
||||
class EmbeddingAssemblerOperator(AssemblerOperator[Knowledge, List[Chunk]]):
|
||||
"""The Embedding Assembler Operator."""
|
||||
|
||||
metadata = ViewMetadata(
|
||||
label=_("Embedding Assembler Operator"),
|
||||
name="embedding_assembler_operator",
|
||||
description=_("Load knowledge and assemble embedding chunks to vector store."),
|
||||
category=OperatorCategory.RAG,
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("Vector Store Connector"),
|
||||
"vector_store_connector",
|
||||
VectorStoreConnector,
|
||||
description=_("The vector store connector."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Chunk Parameters"),
|
||||
"chunk_parameters",
|
||||
ChunkParameters,
|
||||
description=_("The chunk parameters."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
],
|
||||
inputs=[
|
||||
IOField.build_from(
|
||||
_("Knowledge"),
|
||||
"knowledge",
|
||||
Knowledge,
|
||||
description=_("The knowledge to be loaded."),
|
||||
)
|
||||
],
|
||||
outputs=[
|
||||
IOField.build_from(
|
||||
_("Chunks"),
|
||||
"chunks",
|
||||
Chunk,
|
||||
description=_(
|
||||
"The assembled chunks, it has been persisted to vector " "store."
|
||||
),
|
||||
is_list=True,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vector_store_connector: VectorStoreConnector,
|
||||
chunk_parameters: Optional[ChunkParameters] = ChunkParameters(
|
||||
chunk_strategy="CHUNK_BY_SIZE"
|
||||
),
|
||||
chunk_parameters: Optional[ChunkParameters] = None,
|
||||
**kwargs
|
||||
):
|
||||
"""Create a new EmbeddingAssemblerOperator.
|
||||
@@ -68,6 +175,8 @@ class EmbeddingAssemblerOperator(AssemblerOperator[Knowledge, List[Chunk]]):
|
||||
chunk_parameters (Optional[ChunkParameters], optional): The chunk
|
||||
parameters. Defaults to ChunkParameters(chunk_strategy="CHUNK_BY_SIZE").
|
||||
"""
|
||||
if not chunk_parameters:
|
||||
chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE")
|
||||
self._chunk_parameters = chunk_parameters
|
||||
self._vector_store_connector = vector_store_connector
|
||||
super().__init__(**kwargs)
|
||||
|
@@ -1,7 +1,8 @@
|
||||
"""Knowledge Operator."""
|
||||
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from dbgpt.core import Chunk
|
||||
from dbgpt.core.awel import MapOperator
|
||||
from dbgpt.core.awel.flow import (
|
||||
IOField,
|
||||
@@ -12,44 +13,47 @@ from dbgpt.core.awel.flow import (
|
||||
)
|
||||
from dbgpt.rag.knowledge.base import Knowledge, KnowledgeType
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
|
||||
class KnowledgeOperator(MapOperator[str, Knowledge]):
|
||||
"""Knowledge Factory Operator."""
|
||||
|
||||
metadata = ViewMetadata(
|
||||
label="Knowledge Factory Operator",
|
||||
label=_("Knowledge Operator"),
|
||||
name="knowledge_operator",
|
||||
category=OperatorCategory.RAG,
|
||||
description="The knowledge operator.",
|
||||
description=_(
|
||||
_("The knowledge operator, which can create knowledge from datasource.")
|
||||
),
|
||||
inputs=[
|
||||
IOField.build_from(
|
||||
"knowledge datasource",
|
||||
_("knowledge datasource"),
|
||||
"knowledge datasource",
|
||||
str,
|
||||
"knowledge datasource",
|
||||
_("knowledge datasource, which can be a document, url, or text."),
|
||||
)
|
||||
],
|
||||
outputs=[
|
||||
IOField.build_from(
|
||||
"Knowledge",
|
||||
_("Knowledge"),
|
||||
"Knowledge",
|
||||
Knowledge,
|
||||
description="Knowledge",
|
||||
description=_("Knowledge object."),
|
||||
)
|
||||
],
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
label="datasource",
|
||||
label=_("Default datasource"),
|
||||
name="datasource",
|
||||
type=str,
|
||||
optional=True,
|
||||
default="DOCUMENT",
|
||||
description="datasource",
|
||||
default=None,
|
||||
description=_("Default datasource."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="knowledge_type",
|
||||
name="knowledge type",
|
||||
label=_("Knowledge type"),
|
||||
name="knowledge_type",
|
||||
type=str,
|
||||
optional=True,
|
||||
options=[
|
||||
@@ -64,7 +68,7 @@ class KnowledgeOperator(MapOperator[str, Knowledge]):
|
||||
),
|
||||
],
|
||||
default=KnowledgeType.DOCUMENT.name,
|
||||
description="knowledge type",
|
||||
description=_("Knowledge type."),
|
||||
),
|
||||
],
|
||||
documentation_url="https://github.com/openai/openai-python",
|
||||
@@ -92,3 +96,50 @@ class KnowledgeOperator(MapOperator[str, Knowledge]):
|
||||
return await self.blocking_func_to_async(
|
||||
KnowledgeFactory.create, datasource, self._knowledge_type
|
||||
)
|
||||
|
||||
|
||||
class ChunksToStringOperator(MapOperator[List[Chunk], str]):
|
||||
"""The Chunks To String Operator."""
|
||||
|
||||
metadata = ViewMetadata(
|
||||
label=_("Chunks To String Operator"),
|
||||
name="chunks_to_string_operator",
|
||||
description=_("Convert chunks to string."),
|
||||
category=OperatorCategory.RAG,
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("Separator"),
|
||||
"separator",
|
||||
str,
|
||||
description=_("The separator between chunks."),
|
||||
optional=True,
|
||||
default="\n",
|
||||
)
|
||||
],
|
||||
inputs=[
|
||||
IOField.build_from(
|
||||
_("Chunks"),
|
||||
"chunks",
|
||||
Chunk,
|
||||
description=_("The input chunks."),
|
||||
is_list=True,
|
||||
)
|
||||
],
|
||||
outputs=[
|
||||
IOField.build_from(
|
||||
_("String"),
|
||||
"string",
|
||||
str,
|
||||
description=_("The output string."),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
def __init__(self, separator: str = "\n", **kwargs):
|
||||
"""Create a new ChunksToStringOperator."""
|
||||
self._separator = separator
|
||||
super().__init__(**kwargs)
|
||||
|
||||
async def map(self, chunks: List[Chunk]) -> str:
|
||||
"""Map the chunks to string."""
|
||||
return self._separator.join([chunk.content for chunk in chunks])
|
||||
|
@@ -6,58 +6,61 @@ from dbgpt.core import LLMClient
|
||||
from dbgpt.core.awel import MapOperator
|
||||
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata
|
||||
from dbgpt.rag.retriever.rewrite import QueryRewrite
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
|
||||
class QueryRewriteOperator(MapOperator[dict, Any]):
|
||||
"""The Rewrite Operator."""
|
||||
|
||||
metadata = ViewMetadata(
|
||||
label="Query Rewrite Operator",
|
||||
label=_("Query Rewrite Operator"),
|
||||
name="query_rewrite_operator",
|
||||
category=OperatorCategory.RAG,
|
||||
description="query rewrite operator.",
|
||||
description=_("Query rewrite operator."),
|
||||
inputs=[
|
||||
IOField.build_from("query_context", "query_context", dict, "query context")
|
||||
IOField.build_from(
|
||||
_("Query context"), "query_context", dict, _("query context")
|
||||
)
|
||||
],
|
||||
outputs=[
|
||||
IOField.build_from(
|
||||
"rewritten queries",
|
||||
_("Rewritten queries"),
|
||||
"queries",
|
||||
str,
|
||||
is_list=True,
|
||||
description="rewritten queries",
|
||||
description=_("Rewritten queries"),
|
||||
)
|
||||
],
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
"LLM Client",
|
||||
_("LLM Client"),
|
||||
"llm_client",
|
||||
LLMClient,
|
||||
description="The LLM Client.",
|
||||
description=_("The LLM Client."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="model name",
|
||||
label=_("Model name"),
|
||||
name="model_name",
|
||||
type=str,
|
||||
optional=True,
|
||||
default="gpt-3.5-turbo",
|
||||
description="llm model name",
|
||||
description=_("LLM model name."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="prompt language",
|
||||
label=_("Prompt language"),
|
||||
name="language",
|
||||
type=str,
|
||||
optional=True,
|
||||
default="en",
|
||||
description="prompt language",
|
||||
description=_("Prompt language."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="nums",
|
||||
label=_("Number of results"),
|
||||
name="nums",
|
||||
type=int,
|
||||
optional=True,
|
||||
default=5,
|
||||
description="rewrite query nums",
|
||||
description=_("rewrite query number."),
|
||||
),
|
||||
],
|
||||
documentation_url="https://github.com/openai/openai-python",
|
||||
|
@@ -7,24 +7,25 @@ from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetad
|
||||
from dbgpt.rag.assembler.summary import SummaryAssembler
|
||||
from dbgpt.rag.knowledge.base import Knowledge
|
||||
from dbgpt.rag.operators.assembler import AssemblerOperator
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
|
||||
class SummaryAssemblerOperator(AssemblerOperator[Any, Any]):
|
||||
"""The summary assembler operator."""
|
||||
|
||||
metadata = ViewMetadata(
|
||||
label="Summary Operator",
|
||||
label=_("Summary Operator"),
|
||||
name="summary_assembler_operator",
|
||||
category=OperatorCategory.RAG,
|
||||
description="The summary assembler operator.",
|
||||
description=_("The summary assembler operator."),
|
||||
inputs=[
|
||||
IOField.build_from(
|
||||
"Knowledge", "knowledge", Knowledge, "knowledge datasource"
|
||||
_("Knowledge"), "knowledge", Knowledge, _("Knowledge datasource")
|
||||
)
|
||||
],
|
||||
outputs=[
|
||||
IOField.build_from(
|
||||
"document summary",
|
||||
_("Document summary"),
|
||||
"summary",
|
||||
str,
|
||||
description="document summary",
|
||||
@@ -32,44 +33,44 @@ class SummaryAssemblerOperator(AssemblerOperator[Any, Any]):
|
||||
],
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
"LLM Client",
|
||||
_("LLM Client"),
|
||||
"llm_client",
|
||||
LLMClient,
|
||||
optional=True,
|
||||
default=None,
|
||||
description="The LLM Client.",
|
||||
description=_("The LLM Client."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="model name",
|
||||
label=_("Model name"),
|
||||
name="model_name",
|
||||
type=str,
|
||||
optional=True,
|
||||
default="gpt-3.5-turbo",
|
||||
description="llm model name",
|
||||
description=_("LLM model name"),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="prompt language",
|
||||
label=_("prompt language"),
|
||||
name="language",
|
||||
type=str,
|
||||
optional=True,
|
||||
default="en",
|
||||
description="prompt language",
|
||||
description=_("prompt language"),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="max_iteration_with_llm",
|
||||
label=_("Max iteration with LLM"),
|
||||
name="max_iteration_with_llm",
|
||||
type=int,
|
||||
optional=True,
|
||||
default=5,
|
||||
description="prompt language",
|
||||
description=_("prompt language"),
|
||||
),
|
||||
Parameter.build_from(
|
||||
label="concurrency_limit_with_llm",
|
||||
label=_("Concurrency limit with LLM"),
|
||||
name="concurrency_limit_with_llm",
|
||||
type=int,
|
||||
optional=True,
|
||||
default=3,
|
||||
description="The concurrency limit with llm",
|
||||
description=_("The concurrency limit with llm"),
|
||||
),
|
||||
],
|
||||
documentation_url="https://github.com/openai/openai-python",
|
||||
|
Reference in New Issue
Block a user