mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-21 09:43:11 +00:00
fix:rag workflow update
This commit is contained in:
parent
3fa7bee289
commit
2107f472e1
@ -7,7 +7,7 @@ from typing import List, Optional
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from dbgpt.core import Chunk
|
||||
from dbgpt.core import Chunk, Embeddings
|
||||
from dbgpt.storage.base import IndexStoreBase, IndexStoreConfig
|
||||
from dbgpt.storage.graph_store.graph import Graph
|
||||
from dbgpt.util import RegisterParameters
|
||||
@ -29,6 +29,11 @@ class KnowledgeGraphBase(IndexStoreBase, ABC):
|
||||
def get_config(self) -> KnowledgeGraphConfig:
|
||||
"""Get the knowledge graph config."""
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Embeddings:
|
||||
"""Get the knowledge graph embeddings."""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def query_graph(self, limit: Optional[int] = None) -> Graph:
|
||||
"""Get graph data."""
|
||||
|
@ -17,7 +17,7 @@ from dbgpt.util.i18n_utils import _
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_COMMON_PARAMETERS = [
|
||||
_VECTOR_STORE_COMMON_PARAMETERS = [
|
||||
Parameter.build_from(
|
||||
_("Collection Name"),
|
||||
"name",
|
||||
@ -28,6 +28,20 @@ _COMMON_PARAMETERS = [
|
||||
optional=True,
|
||||
default="dbgpt_collection",
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Embedding Function"),
|
||||
"embedding_fn",
|
||||
Embeddings,
|
||||
description=_(
|
||||
"The embedding function of vector store, if not set, will use "
|
||||
"the default embedding function."
|
||||
),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
]
|
||||
|
||||
_COMMON_PARAMETERS = [
|
||||
Parameter.build_from(
|
||||
_("User"),
|
||||
"user",
|
||||
@ -48,40 +62,6 @@ _COMMON_PARAMETERS = [
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Embedding Function"),
|
||||
"embedding_fn",
|
||||
Embeddings,
|
||||
description=_(
|
||||
"The embedding function of vector store, if not set, will use "
|
||||
"the default embedding function."
|
||||
),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Max Chunks Once Load"),
|
||||
"max_chunks_once_load",
|
||||
int,
|
||||
description=_(
|
||||
"The max number of chunks to load at once. If your document is "
|
||||
"large, you can set this value to a larger number to speed up the loading "
|
||||
"process. Default is 10."
|
||||
),
|
||||
optional=True,
|
||||
default=10,
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Max Threads"),
|
||||
"max_threads",
|
||||
int,
|
||||
description=_(
|
||||
"The max number of threads to use. Default is 1. If you set "
|
||||
"this bigger than 1, please make sure your vector store is thread-safe."
|
||||
),
|
||||
optional=True,
|
||||
default=1,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
@ -58,7 +58,7 @@ class KnowledgeGraphOperator(MapOperator[List[Chunk], List[Chunk]]):
|
||||
"""Init the Knowledge Graph operator."""
|
||||
MapOperator.__init__(self, **kwargs)
|
||||
self._graph_store = graph_store
|
||||
self._embeddings = graph_store.get_config().embedding_fn
|
||||
self._embeddings = graph_store.embeddings
|
||||
self._max_chunks_once_load = max_chunks_once_load
|
||||
self.graph_store = graph_store
|
||||
|
||||
|
@ -84,7 +84,7 @@ class KnowledgeProcessBranchOperator(BranchOperator[Knowledge, Knowledge]):
|
||||
|
||||
async def check_graph_process(r: Knowledge) -> bool:
|
||||
# If check graph is true, we will run extract knowledge graph triplets.
|
||||
from dbgpt.rag.operators import KnowledgeGraphOperator
|
||||
from dbgpt_ext.rag.operators import KnowledgeGraphOperator
|
||||
|
||||
if KnowledgeGraphOperator in download_cls_list:
|
||||
return True
|
||||
@ -92,7 +92,7 @@ class KnowledgeProcessBranchOperator(BranchOperator[Knowledge, Knowledge]):
|
||||
|
||||
async def check_embedding_process(r: Knowledge) -> bool:
|
||||
# If check embedding is true, we will run extract document embedding.
|
||||
from dbgpt.rag.operators import VectorStorageOperator
|
||||
from dbgpt_ext.rag.operators import VectorStorageOperator
|
||||
|
||||
if VectorStorageOperator in download_cls_list:
|
||||
return True
|
||||
@ -100,7 +100,7 @@ class KnowledgeProcessBranchOperator(BranchOperator[Knowledge, Knowledge]):
|
||||
|
||||
async def check_full_text_process(r: Knowledge) -> bool:
|
||||
# If check full text is true, we will run extract document keywords.
|
||||
from dbgpt.rag.operators.full_text import FullTextStorageOperator
|
||||
from dbgpt_ext.rag.operators.full_text import FullTextStorageOperator
|
||||
|
||||
if FullTextStorageOperator in download_cls_list:
|
||||
return True
|
||||
|
@ -58,7 +58,7 @@ class VectorStorageOperator(MapOperator[List[Chunk], List[Chunk]]):
|
||||
"""Init the datasource operator."""
|
||||
MapOperator.__init__(self, **kwargs)
|
||||
self._vector_store = vector_store
|
||||
self._embeddings = vector_store.get_config().embedding_fn
|
||||
self._embeddings = vector_store.embeddings
|
||||
self._max_chunks_once_load = max_chunks_once_load
|
||||
self.vector_store = vector_store
|
||||
|
||||
|
@ -7,13 +7,55 @@ import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List
|
||||
|
||||
from dbgpt.core.awel.flow import register_resource, ResourceCategory, Parameter
|
||||
from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig
|
||||
from dbgpt.storage.graph_store.graph import GraphElemType
|
||||
from dbgpt_ext.datasource.conn_tugraph import TuGraphConnector
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@register_resource(
|
||||
_("TuGraph Graph Config"),
|
||||
"tugraph_config",
|
||||
category=ResourceCategory.KNOWLEDGE_GRAPH,
|
||||
description=_("TuGraph config."),
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("host"),
|
||||
"host",
|
||||
str,
|
||||
optional=True,
|
||||
default="127.0.0.1",
|
||||
description=_("TuGraph host"),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("port"),
|
||||
"port",
|
||||
int,
|
||||
optional=True,
|
||||
default="7687",
|
||||
description=_("TuGraph port"),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("username"),
|
||||
"username",
|
||||
str,
|
||||
optional=True,
|
||||
default="admin",
|
||||
description=_("TuGraph username"),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("password"),
|
||||
"password",
|
||||
str,
|
||||
optional=True,
|
||||
default="73@TuGraph",
|
||||
description=_("TuGraph password"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@dataclass
|
||||
class TuGraphStoreConfig(GraphStoreConfig):
|
||||
"""TuGraph store config."""
|
||||
|
@ -285,6 +285,12 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
|
||||
"""Get the knowledge graph config."""
|
||||
return self._config
|
||||
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Embeddings:
|
||||
"""Get the knowledge graph config."""
|
||||
return self._embedding_fn
|
||||
|
||||
async def aload_document(self, chunks: List[Chunk]) -> List[str]:
|
||||
"""Extract and persist graph from the document file."""
|
||||
if not self.vector_name_exists():
|
||||
|
@ -4,7 +4,7 @@ import asyncio
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Any
|
||||
|
||||
from dbgpt.core import Chunk, Embeddings, LLMClient
|
||||
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
@ -68,50 +68,50 @@ GRAPH_PARAMETERS = [
|
||||
]
|
||||
|
||||
|
||||
@register_resource(
|
||||
_("Builtin Graph Config"),
|
||||
"knowledge_graph_config",
|
||||
category=ResourceCategory.KNOWLEDGE_GRAPH,
|
||||
description=_("knowledge graph config."),
|
||||
parameters=[
|
||||
*GRAPH_PARAMETERS,
|
||||
Parameter.build_from(
|
||||
_("Knowledge Graph Type"),
|
||||
"graph_store_type",
|
||||
str,
|
||||
description=_("graph store type."),
|
||||
optional=True,
|
||||
default="TuGraph",
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Client"),
|
||||
"llm_client",
|
||||
LLMClient,
|
||||
description=_("llm client for extract graph triplets."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Model Name"),
|
||||
"model_name",
|
||||
str,
|
||||
description=_("llm model name."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
],
|
||||
)
|
||||
@dataclass
|
||||
class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
|
||||
"""Builtin knowledge graph config."""
|
||||
|
||||
__type__ = "tugraph"
|
||||
|
||||
llm_model: Optional[str] = field(
|
||||
default=None, metadata={"description": "llm model name."}
|
||||
)
|
||||
|
||||
graph_type: Optional[str] = field(
|
||||
default="TuGraph", metadata={"description": "graph store type."}
|
||||
)
|
||||
# @register_resource(
|
||||
# _("Builtin Graph Config"),
|
||||
# "knowledge_graph_config",
|
||||
# category=ResourceCategory.KNOWLEDGE_GRAPH,
|
||||
# description=_("knowledge graph config."),
|
||||
# parameters=[
|
||||
# *GRAPH_PARAMETERS,
|
||||
# Parameter.build_from(
|
||||
# _("Knowledge Graph Type"),
|
||||
# "graph_store_type",
|
||||
# str,
|
||||
# description=_("graph store type."),
|
||||
# optional=True,
|
||||
# default="TuGraph",
|
||||
# ),
|
||||
# Parameter.build_from(
|
||||
# _("LLM Client"),
|
||||
# "llm_client",
|
||||
# LLMClient,
|
||||
# description=_("llm client for extract graph triplets."),
|
||||
# ),
|
||||
# Parameter.build_from(
|
||||
# _("LLM Model Name"),
|
||||
# "model_name",
|
||||
# str,
|
||||
# description=_("llm model name."),
|
||||
# optional=True,
|
||||
# default=None,
|
||||
# ),
|
||||
# ],
|
||||
# )
|
||||
# @dataclass
|
||||
# class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
|
||||
# """Builtin knowledge graph config."""
|
||||
#
|
||||
# __type__ = "tugraph"
|
||||
#
|
||||
# llm_model: Optional[str] = field(
|
||||
# default=None, metadata={"description": "llm model name."}
|
||||
# )
|
||||
#
|
||||
# graph_type: Optional[str] = field(
|
||||
# default="TuGraph", metadata={"description": "graph store type."}
|
||||
# )
|
||||
|
||||
|
||||
@register_resource(
|
||||
@ -121,13 +121,34 @@ class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
|
||||
description=_("Builtin Knowledge Graph."),
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("Builtin Knowledge Graph Config."),
|
||||
_("Graph Store Config"),
|
||||
"config",
|
||||
BuiltinKnowledgeGraphConfig,
|
||||
description=_("Builtin Knowledge Graph Config."),
|
||||
GraphStoreConfig,
|
||||
description=_("graph store config."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("Graph Store Name"),
|
||||
"name",
|
||||
str,
|
||||
optional=True,
|
||||
default="dbgpt",
|
||||
description=_("Graph Store Name"),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Client"),
|
||||
"llm_client",
|
||||
LLMClient,
|
||||
description=_("llm client for extract graph triplets."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
_("LLM Model Name"),
|
||||
"llm_model",
|
||||
str,
|
||||
description=_("kg extract llm model name."),
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
|
||||
],
|
||||
)
|
||||
class BuiltinKnowledgeGraph(KnowledgeGraphBase):
|
||||
@ -168,6 +189,11 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
|
||||
"""Get the knowledge graph config."""
|
||||
return self._config
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Any:
|
||||
"""Get the knowledge graph config."""
|
||||
return None
|
||||
|
||||
def load_document(self, chunks: List[Chunk]) -> List[str]:
|
||||
"""Extract and persist triplets to graph store."""
|
||||
|
||||
|
@ -13,7 +13,7 @@ from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.storage.vector_store.base import (
|
||||
_COMMON_PARAMETERS,
|
||||
VectorStoreBase,
|
||||
VectorStoreConfig,
|
||||
VectorStoreConfig, _VECTOR_STORE_COMMON_PARAMETERS,
|
||||
)
|
||||
from dbgpt.storage.vector_store.filters import FilterOperator, MetadataFilters
|
||||
from dbgpt.util import string_utils
|
||||
@ -77,6 +77,7 @@ class ChromaVectorConfig(VectorStoreConfig):
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
*_VECTOR_STORE_COMMON_PARAMETERS,
|
||||
],
|
||||
)
|
||||
class ChromaStore(VectorStoreBase):
|
||||
|
@ -12,7 +12,7 @@ from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.storage.vector_store.base import (
|
||||
_COMMON_PARAMETERS,
|
||||
VectorStoreBase,
|
||||
VectorStoreConfig,
|
||||
VectorStoreConfig, _VECTOR_STORE_COMMON_PARAMETERS,
|
||||
)
|
||||
from dbgpt.storage.vector_store.filters import MetadataFilters
|
||||
from dbgpt.util import string_utils
|
||||
@ -145,6 +145,7 @@ class ElasticsearchStoreConfig(VectorStoreConfig):
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
*_VECTOR_STORE_COMMON_PARAMETERS,
|
||||
],
|
||||
)
|
||||
class ElasticStore(VectorStoreBase):
|
||||
|
@ -13,7 +13,7 @@ from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.storage.vector_store.base import (
|
||||
_COMMON_PARAMETERS,
|
||||
VectorStoreBase,
|
||||
VectorStoreConfig,
|
||||
VectorStoreConfig, _VECTOR_STORE_COMMON_PARAMETERS,
|
||||
)
|
||||
from dbgpt.storage.vector_store.filters import FilterOperator, MetadataFilters
|
||||
from dbgpt.util import string_utils
|
||||
@ -185,6 +185,7 @@ class MilvusVectorConfig(VectorStoreConfig):
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
*_VECTOR_STORE_COMMON_PARAMETERS,
|
||||
],
|
||||
)
|
||||
class MilvusStore(VectorStoreBase):
|
||||
@ -561,7 +562,7 @@ class MilvusStore(VectorStoreBase):
|
||||
# use default index params.
|
||||
if param is None:
|
||||
index_type = self.col.indexes[0].params["index_type"]
|
||||
param = self.index_params_map[index_type].get("params")
|
||||
param = self.index_params_map[index_type]
|
||||
# query text embedding.
|
||||
query_vector = self.embedding.embed_query(query)
|
||||
# Determine result metadata fields.
|
||||
|
@ -17,7 +17,7 @@ from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.storage.vector_store.base import (
|
||||
_COMMON_PARAMETERS,
|
||||
VectorStoreBase,
|
||||
VectorStoreConfig,
|
||||
VectorStoreConfig, _VECTOR_STORE_COMMON_PARAMETERS,
|
||||
)
|
||||
from dbgpt.storage.vector_store.filters import FilterOperator, MetadataFilters
|
||||
from dbgpt.util.i18n_utils import _
|
||||
@ -180,6 +180,7 @@ class OceanBaseConfig(VectorStoreConfig):
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
*_VECTOR_STORE_COMMON_PARAMETERS,
|
||||
],
|
||||
)
|
||||
class OceanBaseStore(VectorStoreBase):
|
||||
|
@ -9,7 +9,7 @@ from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.storage.vector_store.base import (
|
||||
_COMMON_PARAMETERS,
|
||||
VectorStoreBase,
|
||||
VectorStoreConfig,
|
||||
VectorStoreConfig, _VECTOR_STORE_COMMON_PARAMETERS,
|
||||
)
|
||||
from dbgpt.storage.vector_store.filters import MetadataFilters
|
||||
from dbgpt.util.i18n_utils import _
|
||||
@ -70,6 +70,7 @@ class PGVectorConfig(VectorStoreConfig):
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
*_VECTOR_STORE_COMMON_PARAMETERS,
|
||||
],
|
||||
)
|
||||
class PGVectorStore(VectorStoreBase):
|
||||
|
@ -10,7 +10,7 @@ from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
|
||||
from dbgpt.storage.vector_store.base import (
|
||||
_COMMON_PARAMETERS,
|
||||
VectorStoreBase,
|
||||
VectorStoreConfig,
|
||||
VectorStoreConfig, _VECTOR_STORE_COMMON_PARAMETERS,
|
||||
)
|
||||
from dbgpt.storage.vector_store.filters import MetadataFilters
|
||||
from dbgpt.util.i18n_utils import _
|
||||
@ -84,6 +84,7 @@ class WeaviateVectorConfig(VectorStoreConfig):
|
||||
optional=True,
|
||||
default=None,
|
||||
),
|
||||
*_VECTOR_STORE_COMMON_PARAMETERS,
|
||||
],
|
||||
)
|
||||
class WeaviateStore(VectorStoreBase):
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user