feat:Add Knowledge Process Workflow (#2210)

2025-09-06 19:40:13 +00:00 · 2024-12-18 11:16:30 +08:00
parent 3745d7411d
commit b05febbf77
23 changed files with 7217 additions and 8 deletions
--- a/dbgpt/storage/knowledge_graph/community_summary.py
+++ b/dbgpt/storage/knowledge_graph/community_summary.py
@@ -6,22 +6,136 @@ import uuid
 from typing import List, Optional, Tuple

 from dbgpt._private.pydantic import ConfigDict, Field
-from dbgpt.core import Chunk
+from dbgpt.core import Chunk, LLMClient
+from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
 from dbgpt.rag.transformer.community_summarizer import CommunitySummarizer
 from dbgpt.rag.transformer.graph_extractor import GraphExtractor
 from dbgpt.storage.knowledge_graph.base import ParagraphChunk
 from dbgpt.storage.knowledge_graph.community.community_store import CommunityStore
 from dbgpt.storage.knowledge_graph.knowledge_graph import (
+    GRAPH_PARAMETERS,
    BuiltinKnowledgeGraph,
    BuiltinKnowledgeGraphConfig,
 )
 from dbgpt.storage.vector_store.base import VectorStoreConfig
 from dbgpt.storage.vector_store.factory import VectorStoreFactory
 from dbgpt.storage.vector_store.filters import MetadataFilters
+from dbgpt.util.i18n_utils import _

 logger = logging.getLogger(__name__)


+@register_resource(
+    _("Community Summary KG Config"),
+    "community_summary_kg_config",
+    category=ResourceCategory.KNOWLEDGE_GRAPH,
+    description=_("community Summary kg Config."),
+    parameters=[
+        *GRAPH_PARAMETERS,
+        Parameter.build_from(
+            _("Knowledge Graph Type"),
+            "graph_store_type",
+            str,
+            description=_("graph store type."),
+            optional=True,
+            default="TuGraph",
+        ),
+        Parameter.build_from(
+            _("LLM Client"),
+            "llm_client",
+            LLMClient,
+            description=_("llm client for extract graph triplets."),
+        ),
+        Parameter.build_from(
+            _("LLM Model Name"),
+            "model_name",
+            str,
+            description=_("llm model name."),
+            optional=True,
+            default=None,
+        ),
+        Parameter.build_from(
+            _("Vector Store Type"),
+            "vector_store_type",
+            str,
+            description=_("vector store type."),
+            optional=True,
+            default="Chroma",
+        ),
+        Parameter.build_from(
+            _("Topk of Knowledge Graph Extract"),
+            "extract_topk",
+            int,
+            description=_("Topk of knowledge graph extract"),
+            optional=True,
+            default=5,
+        ),
+        Parameter.build_from(
+            _("Recall Score of Knowledge Graph Extract"),
+            "extract_score_threshold",
+            float,
+            description=_("Recall score of knowledge graph extract"),
+            optional=True,
+            default=0.3,
+        ),
+        Parameter.build_from(
+            _("Recall Score of Community Search in Knowledge Graph"),
+            "community_topk",
+            int,
+            description=_("Recall score of community search in knowledge graph"),
+            optional=True,
+            default=50,
+        ),
+        Parameter.build_from(
+            _("Recall Score of Community Search in Knowledge Graph"),
+            "community_score_threshold",
+            float,
+            description=_("Recall score of community search in knowledge graph"),
+            optional=True,
+            default=0.0,
+        ),
+        Parameter.build_from(
+            _("Enable the graph search for documents and chunks"),
+            "triplet_graph_enabled",
+            bool,
+            description=_("Enable the graph search for triplets"),
+            optional=True,
+            default=True,
+        ),
+        Parameter.build_from(
+            _("Enable the graph search for documents and chunks"),
+            "document_graph_enabled",
+            bool,
+            description=_("Enable the graph search for documents and chunks"),
+            optional=True,
+            default=True,
+        ),
+        Parameter.build_from(
+            _("Top size of knowledge graph chunk search"),
+            "knowledge_graph_chunk_search_top_size",
+            int,
+            description=_("Top size of knowledge graph chunk search"),
+            optional=True,
+            default=5,
+        ),
+        Parameter.build_from(
+            _("Batch size of triplets extraction from the text"),
+            "knowledge_graph_extraction_batch_size",
+            int,
+            description=_("Batch size of triplets extraction from the text"),
+            optional=True,
+            default=20,
+        ),
+        Parameter.build_from(
+            _("Batch size of parallel community building process"),
+            "community_summary_batch_size",
+            int,
+            description=_("TBatch size of parallel community building process"),
+            optional=True,
+            default=20,
+        ),
+    ],
+)
 class CommunitySummaryKnowledgeGraphConfig(BuiltinKnowledgeGraphConfig):
    """Community summary knowledge graph config."""

@@ -80,6 +194,22 @@ class CommunitySummaryKnowledgeGraphConfig(BuiltinKnowledgeGraphConfig):
    )


+@register_resource(
+    _("Community Summary Knowledge Graph"),
+    "community_summary_knowledge_graph",
+    category=ResourceCategory.KNOWLEDGE_GRAPH,
+    description=_("Community Summary Knowledge Graph."),
+    parameters=[
+        Parameter.build_from(
+            _("Community Summary Knowledge Graph Config."),
+            "config",
+            BuiltinKnowledgeGraphConfig,
+            description=_("Community Summary Knowledge Graph Config."),
+            optional=True,
+            default=None,
+        ),
+    ],
+)
 class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
    """Community summary knowledge graph class."""

--- a/dbgpt/storage/knowledge_graph/knowledge_graph.py
+++ b/dbgpt/storage/knowledge_graph/knowledge_graph.py
@@ -6,7 +6,8 @@ import os
 from typing import List, Optional

 from dbgpt._private.pydantic import ConfigDict, Field
-from dbgpt.core import Chunk, LLMClient
+from dbgpt.core import Chunk, Embeddings, LLMClient
+from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
 from dbgpt.rag.transformer.keyword_extractor import KeywordExtractor
 from dbgpt.rag.transformer.triplet_extractor import TripletExtractor
 from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig
@@ -16,10 +17,87 @@ from dbgpt.storage.knowledge_graph.base import KnowledgeGraphBase, KnowledgeGrap
 from dbgpt.storage.knowledge_graph.community.base import GraphStoreAdapter
 from dbgpt.storage.knowledge_graph.community.factory import GraphStoreAdapterFactory
 from dbgpt.storage.vector_store.filters import MetadataFilters
+from dbgpt.util.i18n_utils import _

 logger = logging.getLogger(__name__)

+GRAPH_PARAMETERS = [
+    Parameter.build_from(
+        _("Graph Name"),
+        "name",
+        str,
+        description=_("The name of Graph, if not set, will use the default name."),
+        optional=True,
+        default="dbgpt_collection",
+    ),
+    Parameter.build_from(
+        _("Embedding Function"),
+        "embedding_fn",
+        Embeddings,
+        description=_(
+            "The embedding function of vector store, if not set, will use "
+            "the default embedding function."
+        ),
+        optional=True,
+        default=None,
+    ),
+    Parameter.build_from(
+        _("Max Chunks Once Load"),
+        "max_chunks_once_load",
+        int,
+        description=_(
+            "The max number of chunks to load at once. If your document is "
+            "large, you can set this value to a larger number to speed up the loading "
+            "process. Default is 10."
+        ),
+        optional=True,
+        default=10,
+    ),
+    Parameter.build_from(
+        _("Max Threads"),
+        "max_threads",
+        int,
+        description=_(
+            "The max number of threads to use. Default is 1. If you set "
+            "this bigger than 1, please make sure your vector store is thread-safe."
+        ),
+        optional=True,
+        default=1,
+    ),
+]

+
+@register_resource(
+    _("Builtin Graph Config"),
+    "knowledge_graph_config",
+    category=ResourceCategory.KNOWLEDGE_GRAPH,
+    description=_("knowledge graph config."),
+    parameters=[
+        *GRAPH_PARAMETERS,
+        Parameter.build_from(
+            _("Knowledge Graph Type"),
+            "graph_store_type",
+            str,
+            description=_("graph store type."),
+            optional=True,
+            default="TuGraph",
+        ),
+        Parameter.build_from(
+            _("LLM Client"),
+            "llm_client",
+            LLMClient,
+            description=_("llm client for extract graph triplets."),
+        ),
+        Parameter.build_from(
+            _("LLM Model Name"),
+            "model_name",
+            str,
+            description=_("llm model name."),
+            optional=True,
+            default=None,
+        ),
+    ],
+)
 class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
    """Builtin knowledge graph config."""

@@ -34,6 +112,22 @@ class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
    )


+@register_resource(
+    _("Builtin Knowledge Graph"),
+    "builtin_knowledge_graph",
+    category=ResourceCategory.KNOWLEDGE_GRAPH,
+    description=_("Builtin Knowledge Graph."),
+    parameters=[
+        Parameter.build_from(
+            _("Builtin Knowledge Graph Config."),
+            "config",
+            BuiltinKnowledgeGraphConfig,
+            description=_("Builtin Knowledge Graph Config."),
+            optional=True,
+            default=None,
+        ),
+    ],
+)
 class BuiltinKnowledgeGraph(KnowledgeGraphBase):
    """Builtin knowledge graph class."""