fix: rag knowledge process workflow template (#2509)

Close #issue2277
# Description

fix rag knowledge process workflow template error

# How Has This Been Tested?

run rag knowledge process workflow template

# Snapshots:

Include snapshots for easier review.

# Checklist:

- [ ] My code follows the style guidelines of this project
- [ ] I have already rebased the commits and make the commit message
conform to the project standard.
- [ ] I have performed a self-review of my own code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [ ] Any dependent changes have been merged and published in downstream
modules
This commit is contained in:
magic.chen 2025-03-24 11:04:11 +08:00 committed by GitHub
commit d13875caa9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 5828 additions and 6145 deletions

View File

@ -7,7 +7,7 @@ from typing import List, Optional
from pydantic import Field from pydantic import Field
from dbgpt.core import Chunk from dbgpt.core import Chunk, Embeddings
from dbgpt.storage.base import IndexStoreBase, IndexStoreConfig from dbgpt.storage.base import IndexStoreBase, IndexStoreConfig
from dbgpt.storage.graph_store.graph import Graph from dbgpt.storage.graph_store.graph import Graph
from dbgpt.util import RegisterParameters from dbgpt.util import RegisterParameters
@ -29,6 +29,11 @@ class KnowledgeGraphBase(IndexStoreBase, ABC):
def get_config(self) -> KnowledgeGraphConfig: def get_config(self) -> KnowledgeGraphConfig:
"""Get the knowledge graph config.""" """Get the knowledge graph config."""
@property
def embeddings(self) -> Embeddings:
"""Get the knowledge graph embeddings."""
raise NotImplementedError
@abstractmethod @abstractmethod
def query_graph(self, limit: Optional[int] = None) -> Graph: def query_graph(self, limit: Optional[int] = None) -> Graph:
"""Get graph data.""" """Get graph data."""

View File

@ -17,7 +17,7 @@ from dbgpt.util.i18n_utils import _
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_COMMON_PARAMETERS = [ _VECTOR_STORE_COMMON_PARAMETERS = [
Parameter.build_from( Parameter.build_from(
_("Collection Name"), _("Collection Name"),
"name", "name",
@ -28,6 +28,20 @@ _COMMON_PARAMETERS = [
optional=True, optional=True,
default="dbgpt_collection", default="dbgpt_collection",
), ),
Parameter.build_from(
_("Embedding Function"),
"embedding_fn",
Embeddings,
description=_(
"The embedding function of vector store, if not set, will use "
"the default embedding function."
),
optional=True,
default=None,
),
]
_COMMON_PARAMETERS = [
Parameter.build_from( Parameter.build_from(
_("User"), _("User"),
"user", "user",
@ -48,40 +62,6 @@ _COMMON_PARAMETERS = [
optional=True, optional=True,
default=None, default=None,
), ),
Parameter.build_from(
_("Embedding Function"),
"embedding_fn",
Embeddings,
description=_(
"The embedding function of vector store, if not set, will use "
"the default embedding function."
),
optional=True,
default=None,
),
Parameter.build_from(
_("Max Chunks Once Load"),
"max_chunks_once_load",
int,
description=_(
"The max number of chunks to load at once. If your document is "
"large, you can set this value to a larger number to speed up the loading "
"process. Default is 10."
),
optional=True,
default=10,
),
Parameter.build_from(
_("Max Threads"),
"max_threads",
int,
description=_(
"The max number of threads to use. Default is 1. If you set "
"this bigger than 1, please make sure your vector store is thread-safe."
),
optional=True,
default=1,
),
] ]

View File

@ -58,7 +58,7 @@ class KnowledgeGraphOperator(MapOperator[List[Chunk], List[Chunk]]):
"""Init the Knowledge Graph operator.""" """Init the Knowledge Graph operator."""
MapOperator.__init__(self, **kwargs) MapOperator.__init__(self, **kwargs)
self._graph_store = graph_store self._graph_store = graph_store
self._embeddings = graph_store.get_config().embedding_fn self._embeddings = graph_store.embeddings
self._max_chunks_once_load = max_chunks_once_load self._max_chunks_once_load = max_chunks_once_load
self.graph_store = graph_store self.graph_store = graph_store

View File

@ -84,7 +84,7 @@ class KnowledgeProcessBranchOperator(BranchOperator[Knowledge, Knowledge]):
async def check_graph_process(r: Knowledge) -> bool: async def check_graph_process(r: Knowledge) -> bool:
# If check graph is true, we will run extract knowledge graph triplets. # If check graph is true, we will run extract knowledge graph triplets.
from dbgpt.rag.operators import KnowledgeGraphOperator from dbgpt_ext.rag.operators import KnowledgeGraphOperator
if KnowledgeGraphOperator in download_cls_list: if KnowledgeGraphOperator in download_cls_list:
return True return True
@ -92,7 +92,7 @@ class KnowledgeProcessBranchOperator(BranchOperator[Knowledge, Knowledge]):
async def check_embedding_process(r: Knowledge) -> bool: async def check_embedding_process(r: Knowledge) -> bool:
# If check embedding is true, we will run extract document embedding. # If check embedding is true, we will run extract document embedding.
from dbgpt.rag.operators import VectorStorageOperator from dbgpt_ext.rag.operators import VectorStorageOperator
if VectorStorageOperator in download_cls_list: if VectorStorageOperator in download_cls_list:
return True return True
@ -100,7 +100,7 @@ class KnowledgeProcessBranchOperator(BranchOperator[Knowledge, Knowledge]):
async def check_full_text_process(r: Knowledge) -> bool: async def check_full_text_process(r: Knowledge) -> bool:
# If check full text is true, we will run extract document keywords. # If check full text is true, we will run extract document keywords.
from dbgpt.rag.operators.full_text import FullTextStorageOperator from dbgpt_ext.rag.operators.full_text import FullTextStorageOperator
if FullTextStorageOperator in download_cls_list: if FullTextStorageOperator in download_cls_list:
return True return True

View File

@ -58,7 +58,7 @@ class VectorStorageOperator(MapOperator[List[Chunk], List[Chunk]]):
"""Init the datasource operator.""" """Init the datasource operator."""
MapOperator.__init__(self, **kwargs) MapOperator.__init__(self, **kwargs)
self._vector_store = vector_store self._vector_store = vector_store
self._embeddings = vector_store.get_config().embedding_fn self._embeddings = vector_store.embeddings
self._max_chunks_once_load = max_chunks_once_load self._max_chunks_once_load = max_chunks_once_load
self.vector_store = vector_store self.vector_store = vector_store

View File

@ -7,13 +7,55 @@ import os
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List from typing import List
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig
from dbgpt.storage.graph_store.graph import GraphElemType from dbgpt.storage.graph_store.graph import GraphElemType
from dbgpt.util.i18n_utils import _
from dbgpt_ext.datasource.conn_tugraph import TuGraphConnector from dbgpt_ext.datasource.conn_tugraph import TuGraphConnector
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@register_resource(
_("TuGraph Graph Config"),
"tugraph_config",
category=ResourceCategory.KNOWLEDGE_GRAPH,
description=_("TuGraph config."),
parameters=[
Parameter.build_from(
_("host"),
"host",
str,
optional=True,
default="127.0.0.1",
description=_("TuGraph host"),
),
Parameter.build_from(
_("port"),
"port",
int,
optional=True,
default="7687",
description=_("TuGraph port"),
),
Parameter.build_from(
_("username"),
"username",
str,
optional=True,
default="admin",
description=_("TuGraph username"),
),
Parameter.build_from(
_("password"),
"password",
str,
optional=True,
default="73@TuGraph",
description=_("TuGraph password"),
),
],
)
@dataclass @dataclass
class TuGraphStoreConfig(GraphStoreConfig): class TuGraphStoreConfig(GraphStoreConfig):
"""TuGraph store config.""" """TuGraph store config."""

View File

@ -22,7 +22,6 @@ from dbgpt_ext.storage.knowledge_graph.community.community_store import Communit
from dbgpt_ext.storage.knowledge_graph.knowledge_graph import ( from dbgpt_ext.storage.knowledge_graph.knowledge_graph import (
GRAPH_PARAMETERS, GRAPH_PARAMETERS,
BuiltinKnowledgeGraph, BuiltinKnowledgeGraph,
BuiltinKnowledgeGraphConfig,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -146,10 +145,30 @@ logger = logging.getLogger(__name__)
description=_("Community Summary Knowledge Graph."), description=_("Community Summary Knowledge Graph."),
parameters=[ parameters=[
Parameter.build_from( Parameter.build_from(
_("Community Summary Knowledge Graph Config."), _("Graph Store Config"),
"config", "config",
BuiltinKnowledgeGraphConfig, GraphStoreConfig,
description=_("Community Summary Knowledge Graph Config."), description=_("graph store config."),
),
Parameter.build_from(
_("Graph Store Name"),
"name",
str,
optional=True,
default="dbgpt",
description=_("Graph Store Name"),
),
Parameter.build_from(
_("LLM Client"),
"llm_client",
LLMClient,
description=_("llm client for extract graph triplets."),
),
Parameter.build_from(
_("LLM Model Name"),
"llm_model",
str,
description=_("kg extract llm model name."),
optional=True, optional=True,
default=None, default=None,
), ),
@ -285,6 +304,11 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
"""Get the knowledge graph config.""" """Get the knowledge graph config."""
return self._config return self._config
@property
def embeddings(self) -> Embeddings:
"""Get the knowledge graph config."""
return self._embedding_fn
async def aload_document(self, chunks: List[Chunk]) -> List[str]: async def aload_document(self, chunks: List[Chunk]) -> List[str]:
"""Extract and persist graph from the document file.""" """Extract and persist graph from the document file."""
if not self.vector_name_exists(): if not self.vector_name_exists():

View File

@ -3,15 +3,14 @@
import asyncio import asyncio
import logging import logging
import os import os
from dataclasses import dataclass, field from typing import Any, List, Optional
from typing import List, Optional
from dbgpt.core import Chunk, Embeddings, LLMClient from dbgpt.core import Chunk, Embeddings, LLMClient
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.rag.transformer.keyword_extractor import KeywordExtractor from dbgpt.rag.transformer.keyword_extractor import KeywordExtractor
from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig
from dbgpt.storage.graph_store.graph import Graph from dbgpt.storage.graph_store.graph import Graph
from dbgpt.storage.knowledge_graph.base import KnowledgeGraphBase, KnowledgeGraphConfig from dbgpt.storage.knowledge_graph.base import KnowledgeGraphBase
from dbgpt.storage.vector_store.filters import MetadataFilters from dbgpt.storage.vector_store.filters import MetadataFilters
from dbgpt.util.i18n_utils import _ from dbgpt.util.i18n_utils import _
from dbgpt_ext.rag.transformer.triplet_extractor import TripletExtractor from dbgpt_ext.rag.transformer.triplet_extractor import TripletExtractor
@ -68,50 +67,50 @@ GRAPH_PARAMETERS = [
] ]
@register_resource( # @register_resource(
_("Builtin Graph Config"), # _("Builtin Graph Config"),
"knowledge_graph_config", # "knowledge_graph_config",
category=ResourceCategory.KNOWLEDGE_GRAPH, # category=ResourceCategory.KNOWLEDGE_GRAPH,
description=_("knowledge graph config."), # description=_("knowledge graph config."),
parameters=[ # parameters=[
*GRAPH_PARAMETERS, # *GRAPH_PARAMETERS,
Parameter.build_from( # Parameter.build_from(
_("Knowledge Graph Type"), # _("Knowledge Graph Type"),
"graph_store_type", # "graph_store_type",
str, # str,
description=_("graph store type."), # description=_("graph store type."),
optional=True, # optional=True,
default="TuGraph", # default="TuGraph",
), # ),
Parameter.build_from( # Parameter.build_from(
_("LLM Client"), # _("LLM Client"),
"llm_client", # "llm_client",
LLMClient, # LLMClient,
description=_("llm client for extract graph triplets."), # description=_("llm client for extract graph triplets."),
), # ),
Parameter.build_from( # Parameter.build_from(
_("LLM Model Name"), # _("LLM Model Name"),
"model_name", # "model_name",
str, # str,
description=_("llm model name."), # description=_("llm model name."),
optional=True, # optional=True,
default=None, # default=None,
), # ),
], # ],
) # )
@dataclass # @dataclass
class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig): # class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
"""Builtin knowledge graph config.""" # """Builtin knowledge graph config."""
#
__type__ = "tugraph" # __type__ = "tugraph"
#
llm_model: Optional[str] = field( # llm_model: Optional[str] = field(
default=None, metadata={"description": "llm model name."} # default=None, metadata={"description": "llm model name."}
) # )
#
graph_type: Optional[str] = field( # graph_type: Optional[str] = field(
default="TuGraph", metadata={"description": "graph store type."} # default="TuGraph", metadata={"description": "graph store type."}
) # )
@register_resource( @register_resource(
@ -121,10 +120,30 @@ class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
description=_("Builtin Knowledge Graph."), description=_("Builtin Knowledge Graph."),
parameters=[ parameters=[
Parameter.build_from( Parameter.build_from(
_("Builtin Knowledge Graph Config."), _("Graph Store Config"),
"config", "config",
BuiltinKnowledgeGraphConfig, GraphStoreConfig,
description=_("Builtin Knowledge Graph Config."), description=_("graph store config."),
),
Parameter.build_from(
_("Graph Store Name"),
"name",
str,
optional=True,
default="dbgpt",
description=_("Graph Store Name"),
),
Parameter.build_from(
_("LLM Client"),
"llm_client",
LLMClient,
description=_("llm client for extract graph triplets."),
),
Parameter.build_from(
_("LLM Model Name"),
"llm_model",
str,
description=_("kg extract llm model name."),
optional=True, optional=True,
default=None, default=None,
), ),
@ -168,6 +187,11 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
"""Get the knowledge graph config.""" """Get the knowledge graph config."""
return self._config return self._config
@property
def embeddings(self) -> Any:
"""Get the knowledge graph config."""
return None
def load_document(self, chunks: List[Chunk]) -> List[str]: def load_document(self, chunks: List[Chunk]) -> List[str]:
"""Extract and persist triplets to graph store.""" """Extract and persist triplets to graph store."""

View File

@ -12,6 +12,7 @@ from dbgpt.core import Chunk, Embeddings
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.storage.vector_store.base import ( from dbgpt.storage.vector_store.base import (
_COMMON_PARAMETERS, _COMMON_PARAMETERS,
_VECTOR_STORE_COMMON_PARAMETERS,
VectorStoreBase, VectorStoreBase,
VectorStoreConfig, VectorStoreConfig,
) )
@ -77,6 +78,7 @@ class ChromaVectorConfig(VectorStoreConfig):
optional=True, optional=True,
default=None, default=None,
), ),
*_VECTOR_STORE_COMMON_PARAMETERS,
], ],
) )
class ChromaStore(VectorStoreBase): class ChromaStore(VectorStoreBase):

View File

@ -11,6 +11,7 @@ from dbgpt.core import Chunk, Embeddings
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.storage.vector_store.base import ( from dbgpt.storage.vector_store.base import (
_COMMON_PARAMETERS, _COMMON_PARAMETERS,
_VECTOR_STORE_COMMON_PARAMETERS,
VectorStoreBase, VectorStoreBase,
VectorStoreConfig, VectorStoreConfig,
) )
@ -145,6 +146,7 @@ class ElasticsearchStoreConfig(VectorStoreConfig):
optional=True, optional=True,
default=None, default=None,
), ),
*_VECTOR_STORE_COMMON_PARAMETERS,
], ],
) )
class ElasticStore(VectorStoreBase): class ElasticStore(VectorStoreBase):

View File

@ -12,6 +12,7 @@ from dbgpt.core import Chunk, Embeddings
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.storage.vector_store.base import ( from dbgpt.storage.vector_store.base import (
_COMMON_PARAMETERS, _COMMON_PARAMETERS,
_VECTOR_STORE_COMMON_PARAMETERS,
VectorStoreBase, VectorStoreBase,
VectorStoreConfig, VectorStoreConfig,
) )
@ -185,6 +186,7 @@ class MilvusVectorConfig(VectorStoreConfig):
optional=True, optional=True,
default=None, default=None,
), ),
*_VECTOR_STORE_COMMON_PARAMETERS,
], ],
) )
class MilvusStore(VectorStoreBase): class MilvusStore(VectorStoreBase):

View File

@ -16,6 +16,7 @@ from dbgpt.core import Chunk, Embeddings
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.storage.vector_store.base import ( from dbgpt.storage.vector_store.base import (
_COMMON_PARAMETERS, _COMMON_PARAMETERS,
_VECTOR_STORE_COMMON_PARAMETERS,
VectorStoreBase, VectorStoreBase,
VectorStoreConfig, VectorStoreConfig,
) )
@ -180,6 +181,7 @@ class OceanBaseConfig(VectorStoreConfig):
optional=True, optional=True,
default=None, default=None,
), ),
*_VECTOR_STORE_COMMON_PARAMETERS,
], ],
) )
class OceanBaseStore(VectorStoreBase): class OceanBaseStore(VectorStoreBase):

View File

@ -8,6 +8,7 @@ from dbgpt.core import Chunk, Embeddings
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.storage.vector_store.base import ( from dbgpt.storage.vector_store.base import (
_COMMON_PARAMETERS, _COMMON_PARAMETERS,
_VECTOR_STORE_COMMON_PARAMETERS,
VectorStoreBase, VectorStoreBase,
VectorStoreConfig, VectorStoreConfig,
) )
@ -70,6 +71,7 @@ class PGVectorConfig(VectorStoreConfig):
optional=True, optional=True,
default=None, default=None,
), ),
*_VECTOR_STORE_COMMON_PARAMETERS,
], ],
) )
class PGVectorStore(VectorStoreBase): class PGVectorStore(VectorStoreBase):

View File

@ -9,6 +9,7 @@ from dbgpt.core import Chunk, Embeddings
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.storage.vector_store.base import ( from dbgpt.storage.vector_store.base import (
_COMMON_PARAMETERS, _COMMON_PARAMETERS,
_VECTOR_STORE_COMMON_PARAMETERS,
VectorStoreBase, VectorStoreBase,
VectorStoreConfig, VectorStoreConfig,
) )
@ -84,6 +85,7 @@ class WeaviateVectorConfig(VectorStoreConfig):
optional=True, optional=True,
default=None, default=None,
), ),
*_VECTOR_STORE_COMMON_PARAMETERS,
], ],
) )
class WeaviateStore(VectorStoreBase): class WeaviateStore(VectorStoreBase):