mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-11 13:58:58 +00:00
refactor:adapt rag storage and add integration documents. (#2361)
This commit is contained in:
@@ -70,6 +70,8 @@ datasource_duckdb = [
|
||||
storage_milvus = ["pymilvus"]
|
||||
storage_weaviate = ["weaviate-client"]
|
||||
storage_chromadb = ["chromadb>=0.4.22"]
|
||||
storage_elasticsearch = ["elasticsearch"]
|
||||
storage_obvector = ["pyobvector"]
|
||||
|
||||
[tool.uv]
|
||||
managed = true
|
||||
|
@@ -1 +1,131 @@
|
||||
"""Module of storage."""
|
||||
"""Module of RAG storage."""
|
||||
|
||||
from typing import Tuple, Type
|
||||
|
||||
|
||||
def _import_pgvector() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.pgvector_store import (
|
||||
PGVectorConfig,
|
||||
PGVectorStore,
|
||||
)
|
||||
|
||||
return PGVectorStore, PGVectorConfig
|
||||
|
||||
|
||||
def _import_milvus() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.milvus_store import (
|
||||
MilvusStore,
|
||||
MilvusVectorConfig,
|
||||
)
|
||||
|
||||
return MilvusStore, MilvusVectorConfig
|
||||
|
||||
|
||||
def _import_chroma() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.chroma_store import (
|
||||
ChromaStore,
|
||||
ChromaVectorConfig,
|
||||
)
|
||||
|
||||
return ChromaStore, ChromaVectorConfig
|
||||
|
||||
|
||||
def _import_weaviate() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.weaviate_store import (
|
||||
WeaviateStore,
|
||||
WeaviateVectorConfig,
|
||||
)
|
||||
|
||||
return WeaviateStore, WeaviateVectorConfig
|
||||
|
||||
|
||||
def _import_oceanbase() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.oceanbase_store import (
|
||||
OceanBaseConfig,
|
||||
OceanBaseStore,
|
||||
)
|
||||
|
||||
return OceanBaseStore, OceanBaseConfig
|
||||
|
||||
|
||||
def _import_elastic() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.elastic_store import (
|
||||
ElasticsearchVectorConfig,
|
||||
ElasticStore,
|
||||
)
|
||||
|
||||
return ElasticStore, ElasticsearchVectorConfig
|
||||
|
||||
|
||||
def _import_builtin_knowledge_graph() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.knowledge_graph.knowledge_graph import (
|
||||
BuiltinKnowledgeGraph,
|
||||
BuiltinKnowledgeGraphConfig,
|
||||
)
|
||||
|
||||
return BuiltinKnowledgeGraph, BuiltinKnowledgeGraphConfig
|
||||
|
||||
|
||||
def _import_community_summary_knowledge_graph() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.knowledge_graph.community_summary import (
|
||||
CommunitySummaryKnowledgeGraph,
|
||||
CommunitySummaryKnowledgeGraphConfig,
|
||||
)
|
||||
|
||||
return CommunitySummaryKnowledgeGraph, CommunitySummaryKnowledgeGraphConfig
|
||||
|
||||
|
||||
def _import_openspg() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.knowledge_graph.open_spg import OpenSPG, OpenSPGConfig
|
||||
|
||||
return OpenSPG, OpenSPGConfig
|
||||
|
||||
|
||||
def _import_full_text() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.full_text.elasticsearch import (
|
||||
ElasticDocumentConfig,
|
||||
ElasticDocumentStore,
|
||||
)
|
||||
|
||||
return ElasticDocumentStore, ElasticDocumentConfig
|
||||
|
||||
|
||||
def _select_rag_storage(name: str) -> Tuple[Type, Type]:
|
||||
if name == "Chroma":
|
||||
return _import_chroma()
|
||||
elif name == "Milvus":
|
||||
return _import_milvus()
|
||||
elif name == "Weaviate":
|
||||
return _import_weaviate()
|
||||
elif name == "PGVector":
|
||||
return _import_pgvector()
|
||||
elif name == "OceanBase":
|
||||
return _import_oceanbase()
|
||||
elif name == "ElasticSearch":
|
||||
return _import_elastic()
|
||||
elif name == "KnowledgeGraph":
|
||||
return _import_builtin_knowledge_graph()
|
||||
elif name == "CommunitySummaryKnowledgeGraph":
|
||||
return _import_community_summary_knowledge_graph()
|
||||
elif name == "OpenSPG":
|
||||
return _import_openspg()
|
||||
elif name == "FullText":
|
||||
return _import_full_text()
|
||||
else:
|
||||
raise AttributeError(f"Could not find: {name}")
|
||||
|
||||
|
||||
__vector_store__ = [
|
||||
"Chroma",
|
||||
"Milvus",
|
||||
"Weaviate",
|
||||
"OceanBase",
|
||||
"PGVector",
|
||||
"ElasticSearch",
|
||||
]
|
||||
|
||||
__knowledge_graph__ = ["KnowledgeGraph", "CommunitySummaryKnowledgeGraph", "OpenSPG"]
|
||||
|
||||
__document_store__ = ["FullText"]
|
||||
|
||||
__all__ = __vector_store__ + __knowledge_graph__ + __document_store__
|
||||
|
@@ -1,7 +1,7 @@
|
||||
"""Graph store factory."""
|
||||
|
||||
import logging
|
||||
from typing import Tuple, Type
|
||||
from typing import Optional, Tuple, Type
|
||||
|
||||
from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig
|
||||
from dbgpt_ext.storage import graph_store
|
||||
@@ -13,7 +13,11 @@ class GraphStoreFactory:
|
||||
"""Factory for graph store."""
|
||||
|
||||
@staticmethod
|
||||
def create(graph_store_type: str, graph_store_configure=None) -> GraphStoreBase:
|
||||
def create(
|
||||
graph_store_type: str,
|
||||
graph_store_configure=None,
|
||||
graph_config: Optional[dict] = None,
|
||||
) -> GraphStoreBase:
|
||||
"""Create a GraphStore instance.
|
||||
|
||||
Args:
|
||||
@@ -23,7 +27,10 @@ class GraphStoreFactory:
|
||||
store_cls, cfg_cls = GraphStoreFactory.__find_type(graph_store_type)
|
||||
|
||||
try:
|
||||
config = cfg_cls()
|
||||
if graph_config:
|
||||
config = cfg_cls(**graph_config)
|
||||
else:
|
||||
config = cfg_cls()
|
||||
if graph_store_configure:
|
||||
graph_store_configure(config)
|
||||
return store_cls(config)
|
||||
|
@@ -83,23 +83,18 @@ class TuGraphStore(GraphStoreBase):
|
||||
def __init__(self, config: TuGraphStoreConfig) -> None:
|
||||
"""Initialize the TuGraphStore with connection details."""
|
||||
self._config = config
|
||||
self._host = os.getenv("TUGRAPH_HOST", config.host)
|
||||
self._port = int(os.getenv("TUGRAPH_PORT", config.port))
|
||||
self._username = os.getenv("TUGRAPH_USERNAME", config.username)
|
||||
self._password = os.getenv("TUGRAPH_PASSWORD", config.password)
|
||||
self.enable_summary = (
|
||||
self._host = config.host or os.getenv("TUGRAPH_HOST")
|
||||
self._port = int(config.port or os.getenv("TUGRAPH_PORT"))
|
||||
self._username = config.username or os.getenv("TUGRAPH_USERNAME")
|
||||
self._password = config.password or os.getenv("TUGRAPH_PASSWORD")
|
||||
self.enable_summary = config.enable_summary or (
|
||||
os.getenv("GRAPH_COMMUNITY_SUMMARY_ENABLED", "").lower() == "true"
|
||||
if "GRAPH_COMMUNITY_SUMMARY_ENABLED" in os.environ
|
||||
else config.enable_summary
|
||||
)
|
||||
self.enable_similarity_search = (
|
||||
os.environ["SIMILARITY_SEARCH_ENABLED"].lower() == "true"
|
||||
if "SIMILARITY_SEARCH_ENABLED" in os.environ
|
||||
else config.enable_similarity_search
|
||||
self.enable_similarity_search = config.enable_similarity_search or (
|
||||
os.getenv("SIMILARITY_SEARCH_ENABLED", "").lower() == "true"
|
||||
)
|
||||
self._plugin_names = (
|
||||
self._plugin_names = config.plugin_names or (
|
||||
os.getenv("TUGRAPH_PLUGIN_NAMES", "leiden").split(",")
|
||||
or config.plugin_names
|
||||
)
|
||||
|
||||
self._graph_name = config.name
|
||||
|
@@ -107,9 +107,7 @@ class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
|
||||
|
||||
model_name: str = Field(default=None, description="The name of llm model.")
|
||||
|
||||
graph_store_type: str = Field(
|
||||
default="TuGraph", description="The type of graph store."
|
||||
)
|
||||
type: str = Field(default="TuGraph", description="The type of graph store.")
|
||||
|
||||
|
||||
@register_resource(
|
||||
@@ -151,8 +149,8 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
|
||||
cfg.name = config.name
|
||||
cfg.embedding_fn = config.embedding_fn
|
||||
|
||||
graph_store_type = os.getenv("GRAPH_STORE_TYPE") or config.graph_store_type
|
||||
return GraphStoreFactory.create(graph_store_type, configure)
|
||||
graph_store_type = os.getenv("GRAPH_STORE_TYPE") or config.type
|
||||
return GraphStoreFactory.create(graph_store_type, configure, config.dict())
|
||||
|
||||
def __init_graph_store_adapter(self):
|
||||
return GraphStoreAdapterFactory.create(self._graph_store)
|
||||
|
@@ -1,131 +0,0 @@
|
||||
"""Vector Store Module."""
|
||||
|
||||
from typing import Tuple, Type
|
||||
|
||||
|
||||
def _import_pgvector() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.pgvector_store import (
|
||||
PGVectorConfig,
|
||||
PGVectorStore,
|
||||
)
|
||||
|
||||
return PGVectorStore, PGVectorConfig
|
||||
|
||||
|
||||
def _import_milvus() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.milvus_store import (
|
||||
MilvusStore,
|
||||
MilvusVectorConfig,
|
||||
)
|
||||
|
||||
return MilvusStore, MilvusVectorConfig
|
||||
|
||||
|
||||
def _import_chroma() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.chroma_store import (
|
||||
ChromaStore,
|
||||
ChromaVectorConfig,
|
||||
)
|
||||
|
||||
return ChromaStore, ChromaVectorConfig
|
||||
|
||||
|
||||
def _import_weaviate() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.weaviate_store import (
|
||||
WeaviateStore,
|
||||
WeaviateVectorConfig,
|
||||
)
|
||||
|
||||
return WeaviateStore, WeaviateVectorConfig
|
||||
|
||||
|
||||
def _import_oceanbase() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.oceanbase_store import (
|
||||
OceanBaseConfig,
|
||||
OceanBaseStore,
|
||||
)
|
||||
|
||||
return OceanBaseStore, OceanBaseConfig
|
||||
|
||||
|
||||
def _import_elastic() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.vector_store.elastic_store import (
|
||||
ElasticsearchVectorConfig,
|
||||
ElasticStore,
|
||||
)
|
||||
|
||||
return ElasticStore, ElasticsearchVectorConfig
|
||||
|
||||
|
||||
def _import_builtin_knowledge_graph() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.knowledge_graph.knowledge_graph import (
|
||||
BuiltinKnowledgeGraph,
|
||||
BuiltinKnowledgeGraphConfig,
|
||||
)
|
||||
|
||||
return BuiltinKnowledgeGraph, BuiltinKnowledgeGraphConfig
|
||||
|
||||
|
||||
def _import_community_summary_knowledge_graph() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.knowledge_graph.community_summary import (
|
||||
CommunitySummaryKnowledgeGraph,
|
||||
CommunitySummaryKnowledgeGraphConfig,
|
||||
)
|
||||
|
||||
return CommunitySummaryKnowledgeGraph, CommunitySummaryKnowledgeGraphConfig
|
||||
|
||||
|
||||
def _import_openspg() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.knowledge_graph.open_spg import OpenSPG, OpenSPGConfig
|
||||
|
||||
return OpenSPG, OpenSPGConfig
|
||||
|
||||
|
||||
def _import_full_text() -> Tuple[Type, Type]:
|
||||
from dbgpt_ext.storage.full_text.elasticsearch import (
|
||||
ElasticDocumentConfig,
|
||||
ElasticDocumentStore,
|
||||
)
|
||||
|
||||
return ElasticDocumentStore, ElasticDocumentConfig
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Tuple[Type, Type]:
|
||||
if name == "Chroma":
|
||||
return _import_chroma()
|
||||
elif name == "Milvus":
|
||||
return _import_milvus()
|
||||
elif name == "Weaviate":
|
||||
return _import_weaviate()
|
||||
elif name == "PGVector":
|
||||
return _import_pgvector()
|
||||
elif name == "OceanBase":
|
||||
return _import_oceanbase()
|
||||
elif name == "ElasticSearch":
|
||||
return _import_elastic()
|
||||
elif name == "KnowledgeGraph":
|
||||
return _import_builtin_knowledge_graph()
|
||||
elif name == "CommunitySummaryKnowledgeGraph":
|
||||
return _import_community_summary_knowledge_graph()
|
||||
elif name == "OpenSPG":
|
||||
return _import_openspg()
|
||||
elif name == "FullText":
|
||||
return _import_full_text()
|
||||
else:
|
||||
raise AttributeError(f"Could not find: {name}")
|
||||
|
||||
|
||||
__vector_store__ = [
|
||||
"Chroma",
|
||||
"Milvus",
|
||||
"Weaviate",
|
||||
"OceanBase",
|
||||
"PGVector",
|
||||
"ElasticSearch",
|
||||
]
|
||||
|
||||
__knowledge_graph__ = ["KnowledgeGraph", "CommunitySummaryKnowledgeGraph", "OpenSPG"]
|
||||
|
||||
__document_store__ = ["FullText"]
|
||||
|
||||
__all__ = __vector_store__ + __knowledge_graph__ + __document_store__
|
||||
|
@@ -4,10 +4,6 @@ import logging
|
||||
import os
|
||||
from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
|
||||
|
||||
from chromadb import PersistentClient
|
||||
from chromadb.api.client import SharedSystemClient
|
||||
from chromadb.config import Settings
|
||||
|
||||
from dbgpt._private.pydantic import ConfigDict, Field
|
||||
from dbgpt.configs.model_config import PILOT_PATH
|
||||
from dbgpt.core import Chunk
|
||||
@@ -85,7 +81,10 @@ class ChromaStore(VectorStoreBase):
|
||||
"""
|
||||
super().__init__()
|
||||
self._vector_store_config = vector_store_config
|
||||
|
||||
try:
|
||||
from chromadb import PersistentClient, Settings
|
||||
except ImportError:
|
||||
raise ImportError("Please install chroma package first.")
|
||||
chroma_vector_config = vector_store_config.to_dict(exclude_none=True)
|
||||
chroma_path = chroma_vector_config.get(
|
||||
"persist_path", os.path.join(PILOT_PATH, "data")
|
||||
@@ -203,8 +202,11 @@ class ChromaStore(VectorStoreBase):
|
||||
|
||||
def delete_vector_name(self, vector_name: str):
|
||||
"""Delete vector name."""
|
||||
try:
|
||||
from chromadb.api.client import SharedSystemClient
|
||||
except ImportError:
|
||||
raise ImportError("Please install chroma package first.")
|
||||
logger.info(f"chroma vector_name:{vector_name} begin delete...")
|
||||
# self.vector_store_client.delete_collection()
|
||||
self._chroma_client.delete_collection(self._collection.name)
|
||||
SharedSystemClient.clear_system_cache()
|
||||
self._clean_persist_folder()
|
||||
|
Reference in New Issue
Block a user