mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-06 19:40:13 +00:00
feat:Add Knowledge Process Workflow (#2210)
This commit is contained in:
@@ -6,22 +6,136 @@ import uuid
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from dbgpt._private.pydantic import ConfigDict, Field
|
||||
from dbgpt.core import Chunk
|
||||
from dbgpt.core import Chunk, LLMClient
|
||||
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.rag.transformer.community_summarizer import CommunitySummarizer
|
||||
from dbgpt.rag.transformer.graph_extractor import GraphExtractor
|
||||
from dbgpt.storage.knowledge_graph.base import ParagraphChunk
|
||||
from dbgpt.storage.knowledge_graph.community.community_store import CommunityStore
|
||||
from dbgpt.storage.knowledge_graph.knowledge_graph import (
|
||||
GRAPH_PARAMETERS,
|
||||
BuiltinKnowledgeGraph,
|
||||
BuiltinKnowledgeGraphConfig,
|
||||
)
|
||||
from dbgpt.storage.vector_store.base import VectorStoreConfig
|
||||
from dbgpt.storage.vector_store.factory import VectorStoreFactory
|
||||
from dbgpt.storage.vector_store.filters import MetadataFilters
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_resource(
|
||||
_("Community Summary KG Config"),
|
||||
"community_summary_kg_config",
|
||||
category=ResourceCategory.KNOWLEDGE_GRAPH,
|
||||
description=_("community Summary kg Config."),
|
||||
parameters=[
|
||||
*GRAPH_PARAMETERS,
|
||||
Parameter.build_from(
|
||||
_("Knowledge Graph Type"),
|
||||
"graph_store_type",
|
||||
str,
|
||||
description=_("graph store type."),
|
||||
optional=True,
|
||||
default="TuGraph",
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Client"),
|
||||
"llm_client",
|
||||
LLMClient,
|
||||
description=_("llm client for extract graph triplets."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Model Name"),
|
||||
"model_name",
|
||||
str,
|
||||
description=_("llm model name."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Vector Store Type"),
|
||||
"vector_store_type",
|
||||
str,
|
||||
description=_("vector store type."),
|
||||
optional=True,
|
||||
default="Chroma",
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Topk of Knowledge Graph Extract"),
|
||||
"extract_topk",
|
||||
int,
|
||||
description=_("Topk of knowledge graph extract"),
|
||||
optional=True,
|
||||
default=5,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Recall Score of Knowledge Graph Extract"),
|
||||
"extract_score_threshold",
|
||||
float,
|
||||
description=_("Recall score of knowledge graph extract"),
|
||||
optional=True,
|
||||
default=0.3,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Recall Score of Community Search in Knowledge Graph"),
|
||||
"community_topk",
|
||||
int,
|
||||
description=_("Recall score of community search in knowledge graph"),
|
||||
optional=True,
|
||||
default=50,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Recall Score of Community Search in Knowledge Graph"),
|
||||
"community_score_threshold",
|
||||
float,
|
||||
description=_("Recall score of community search in knowledge graph"),
|
||||
optional=True,
|
||||
default=0.0,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Enable the graph search for documents and chunks"),
|
||||
"triplet_graph_enabled",
|
||||
bool,
|
||||
description=_("Enable the graph search for triplets"),
|
||||
optional=True,
|
||||
default=True,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Enable the graph search for documents and chunks"),
|
||||
"document_graph_enabled",
|
||||
bool,
|
||||
description=_("Enable the graph search for documents and chunks"),
|
||||
optional=True,
|
||||
default=True,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Top size of knowledge graph chunk search"),
|
||||
"knowledge_graph_chunk_search_top_size",
|
||||
int,
|
||||
description=_("Top size of knowledge graph chunk search"),
|
||||
optional=True,
|
||||
default=5,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Batch size of triplets extraction from the text"),
|
||||
"knowledge_graph_extraction_batch_size",
|
||||
int,
|
||||
description=_("Batch size of triplets extraction from the text"),
|
||||
optional=True,
|
||||
default=20,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Batch size of parallel community building process"),
|
||||
"community_summary_batch_size",
|
||||
int,
|
||||
description=_("TBatch size of parallel community building process"),
|
||||
optional=True,
|
||||
default=20,
|
||||
),
|
||||
],
|
||||
)
|
||||
class CommunitySummaryKnowledgeGraphConfig(BuiltinKnowledgeGraphConfig):
|
||||
"""Community summary knowledge graph config."""
|
||||
|
||||
@@ -80,6 +194,22 @@ class CommunitySummaryKnowledgeGraphConfig(BuiltinKnowledgeGraphConfig):
|
||||
)
|
||||
|
||||
|
||||
@register_resource(
|
||||
_("Community Summary Knowledge Graph"),
|
||||
"community_summary_knowledge_graph",
|
||||
category=ResourceCategory.KNOWLEDGE_GRAPH,
|
||||
description=_("Community Summary Knowledge Graph."),
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("Community Summary Knowledge Graph Config."),
|
||||
"config",
|
||||
BuiltinKnowledgeGraphConfig,
|
||||
description=_("Community Summary Knowledge Graph Config."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
],
|
||||
)
|
||||
class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
|
||||
"""Community summary knowledge graph class."""
|
||||
|
||||
|
@@ -6,7 +6,8 @@ import os
|
||||
from typing import List, Optional
|
||||
|
||||
from dbgpt._private.pydantic import ConfigDict, Field
|
||||
from dbgpt.core import Chunk, LLMClient
|
||||
from dbgpt.core import Chunk, Embeddings, LLMClient
|
||||
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.rag.transformer.keyword_extractor import KeywordExtractor
|
||||
from dbgpt.rag.transformer.triplet_extractor import TripletExtractor
|
||||
from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig
|
||||
@@ -16,10 +17,87 @@ from dbgpt.storage.knowledge_graph.base import KnowledgeGraphBase, KnowledgeGrap
|
||||
from dbgpt.storage.knowledge_graph.community.base import GraphStoreAdapter
|
||||
from dbgpt.storage.knowledge_graph.community.factory import GraphStoreAdapterFactory
|
||||
from dbgpt.storage.vector_store.filters import MetadataFilters
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GRAPH_PARAMETERS = [
|
||||
Parameter.build_from(
|
||||
_("Graph Name"),
|
||||
"name",
|
||||
str,
|
||||
description=_("The name of Graph, if not set, will use the default name."),
|
||||
optional=True,
|
||||
default="dbgpt_collection",
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Embedding Function"),
|
||||
"embedding_fn",
|
||||
Embeddings,
|
||||
description=_(
|
||||
"The embedding function of vector store, if not set, will use "
|
||||
"the default embedding function."
|
||||
),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Max Chunks Once Load"),
|
||||
"max_chunks_once_load",
|
||||
int,
|
||||
description=_(
|
||||
"The max number of chunks to load at once. If your document is "
|
||||
"large, you can set this value to a larger number to speed up the loading "
|
||||
"process. Default is 10."
|
||||
),
|
||||
optional=True,
|
||||
default=10,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Max Threads"),
|
||||
"max_threads",
|
||||
int,
|
||||
description=_(
|
||||
"The max number of threads to use. Default is 1. If you set "
|
||||
"this bigger than 1, please make sure your vector store is thread-safe."
|
||||
),
|
||||
optional=True,
|
||||
default=1,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@register_resource(
|
||||
_("Builtin Graph Config"),
|
||||
"knowledge_graph_config",
|
||||
category=ResourceCategory.KNOWLEDGE_GRAPH,
|
||||
description=_("knowledge graph config."),
|
||||
parameters=[
|
||||
*GRAPH_PARAMETERS,
|
||||
Parameter.build_from(
|
||||
_("Knowledge Graph Type"),
|
||||
"graph_store_type",
|
||||
str,
|
||||
description=_("graph store type."),
|
||||
optional=True,
|
||||
default="TuGraph",
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Client"),
|
||||
"llm_client",
|
||||
LLMClient,
|
||||
description=_("llm client for extract graph triplets."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Model Name"),
|
||||
"model_name",
|
||||
str,
|
||||
description=_("llm model name."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
],
|
||||
)
|
||||
class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
|
||||
"""Builtin knowledge graph config."""
|
||||
|
||||
@@ -34,6 +112,22 @@ class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
|
||||
)
|
||||
|
||||
|
||||
@register_resource(
|
||||
_("Builtin Knowledge Graph"),
|
||||
"builtin_knowledge_graph",
|
||||
category=ResourceCategory.KNOWLEDGE_GRAPH,
|
||||
description=_("Builtin Knowledge Graph."),
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("Builtin Knowledge Graph Config."),
|
||||
"config",
|
||||
BuiltinKnowledgeGraphConfig,
|
||||
description=_("Builtin Knowledge Graph Config."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
],
|
||||
)
|
||||
class BuiltinKnowledgeGraph(KnowledgeGraphBase):
|
||||
"""Builtin knowledge graph class."""
|
||||
|
||||
|
Reference in New Issue
Block a user