refactor:adapt rag storage and add integration documents. (#2361)

This commit is contained in:
Aries-ckt
2025-02-24 12:49:36 +08:00
committed by GitHub
parent 94b51284e0
commit 22598ca79f
27 changed files with 647 additions and 343 deletions

View File

@@ -70,6 +70,8 @@ datasource_duckdb = [
storage_milvus = ["pymilvus"]
storage_weaviate = ["weaviate-client"]
storage_chromadb = ["chromadb>=0.4.22"]
storage_elasticsearch = ["elasticsearch"]
storage_obvector = ["pyobvector"]
[tool.uv]
managed = true

View File

@@ -1 +1,131 @@
"""Module of storage."""
"""Module of RAG storage."""
from typing import Tuple, Type
def _import_pgvector() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.pgvector_store import (
PGVectorConfig,
PGVectorStore,
)
return PGVectorStore, PGVectorConfig
def _import_milvus() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.milvus_store import (
MilvusStore,
MilvusVectorConfig,
)
return MilvusStore, MilvusVectorConfig
def _import_chroma() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.chroma_store import (
ChromaStore,
ChromaVectorConfig,
)
return ChromaStore, ChromaVectorConfig
def _import_weaviate() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.weaviate_store import (
WeaviateStore,
WeaviateVectorConfig,
)
return WeaviateStore, WeaviateVectorConfig
def _import_oceanbase() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.oceanbase_store import (
OceanBaseConfig,
OceanBaseStore,
)
return OceanBaseStore, OceanBaseConfig
def _import_elastic() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.elastic_store import (
ElasticsearchVectorConfig,
ElasticStore,
)
return ElasticStore, ElasticsearchVectorConfig
def _import_builtin_knowledge_graph() -> Tuple[Type, Type]:
from dbgpt_ext.storage.knowledge_graph.knowledge_graph import (
BuiltinKnowledgeGraph,
BuiltinKnowledgeGraphConfig,
)
return BuiltinKnowledgeGraph, BuiltinKnowledgeGraphConfig
def _import_community_summary_knowledge_graph() -> Tuple[Type, Type]:
from dbgpt_ext.storage.knowledge_graph.community_summary import (
CommunitySummaryKnowledgeGraph,
CommunitySummaryKnowledgeGraphConfig,
)
return CommunitySummaryKnowledgeGraph, CommunitySummaryKnowledgeGraphConfig
def _import_openspg() -> Tuple[Type, Type]:
from dbgpt_ext.storage.knowledge_graph.open_spg import OpenSPG, OpenSPGConfig
return OpenSPG, OpenSPGConfig
def _import_full_text() -> Tuple[Type, Type]:
from dbgpt_ext.storage.full_text.elasticsearch import (
ElasticDocumentConfig,
ElasticDocumentStore,
)
return ElasticDocumentStore, ElasticDocumentConfig
def _select_rag_storage(name: str) -> Tuple[Type, Type]:
if name == "Chroma":
return _import_chroma()
elif name == "Milvus":
return _import_milvus()
elif name == "Weaviate":
return _import_weaviate()
elif name == "PGVector":
return _import_pgvector()
elif name == "OceanBase":
return _import_oceanbase()
elif name == "ElasticSearch":
return _import_elastic()
elif name == "KnowledgeGraph":
return _import_builtin_knowledge_graph()
elif name == "CommunitySummaryKnowledgeGraph":
return _import_community_summary_knowledge_graph()
elif name == "OpenSPG":
return _import_openspg()
elif name == "FullText":
return _import_full_text()
else:
raise AttributeError(f"Could not find: {name}")
__vector_store__ = [
"Chroma",
"Milvus",
"Weaviate",
"OceanBase",
"PGVector",
"ElasticSearch",
]
__knowledge_graph__ = ["KnowledgeGraph", "CommunitySummaryKnowledgeGraph", "OpenSPG"]
__document_store__ = ["FullText"]
__all__ = __vector_store__ + __knowledge_graph__ + __document_store__

View File

@@ -1,7 +1,7 @@
"""Graph store factory."""
import logging
from typing import Tuple, Type
from typing import Optional, Tuple, Type
from dbgpt.storage.graph_store.base import GraphStoreBase, GraphStoreConfig
from dbgpt_ext.storage import graph_store
@@ -13,7 +13,11 @@ class GraphStoreFactory:
"""Factory for graph store."""
@staticmethod
def create(graph_store_type: str, graph_store_configure=None) -> GraphStoreBase:
def create(
graph_store_type: str,
graph_store_configure=None,
graph_config: Optional[dict] = None,
) -> GraphStoreBase:
"""Create a GraphStore instance.
Args:
@@ -23,7 +27,10 @@ class GraphStoreFactory:
store_cls, cfg_cls = GraphStoreFactory.__find_type(graph_store_type)
try:
config = cfg_cls()
if graph_config:
config = cfg_cls(**graph_config)
else:
config = cfg_cls()
if graph_store_configure:
graph_store_configure(config)
return store_cls(config)

View File

@@ -83,23 +83,18 @@ class TuGraphStore(GraphStoreBase):
def __init__(self, config: TuGraphStoreConfig) -> None:
"""Initialize the TuGraphStore with connection details."""
self._config = config
self._host = os.getenv("TUGRAPH_HOST", config.host)
self._port = int(os.getenv("TUGRAPH_PORT", config.port))
self._username = os.getenv("TUGRAPH_USERNAME", config.username)
self._password = os.getenv("TUGRAPH_PASSWORD", config.password)
self.enable_summary = (
self._host = config.host or os.getenv("TUGRAPH_HOST")
self._port = int(config.port or os.getenv("TUGRAPH_PORT"))
self._username = config.username or os.getenv("TUGRAPH_USERNAME")
self._password = config.password or os.getenv("TUGRAPH_PASSWORD")
self.enable_summary = config.enable_summary or (
os.getenv("GRAPH_COMMUNITY_SUMMARY_ENABLED", "").lower() == "true"
if "GRAPH_COMMUNITY_SUMMARY_ENABLED" in os.environ
else config.enable_summary
)
self.enable_similarity_search = (
os.environ["SIMILARITY_SEARCH_ENABLED"].lower() == "true"
if "SIMILARITY_SEARCH_ENABLED" in os.environ
else config.enable_similarity_search
self.enable_similarity_search = config.enable_similarity_search or (
os.getenv("SIMILARITY_SEARCH_ENABLED", "").lower() == "true"
)
self._plugin_names = (
self._plugin_names = config.plugin_names or (
os.getenv("TUGRAPH_PLUGIN_NAMES", "leiden").split(",")
or config.plugin_names
)
self._graph_name = config.name

View File

@@ -107,9 +107,7 @@ class BuiltinKnowledgeGraphConfig(KnowledgeGraphConfig):
model_name: str = Field(default=None, description="The name of llm model.")
graph_store_type: str = Field(
default="TuGraph", description="The type of graph store."
)
type: str = Field(default="TuGraph", description="The type of graph store.")
@register_resource(
@@ -151,8 +149,8 @@ class BuiltinKnowledgeGraph(KnowledgeGraphBase):
cfg.name = config.name
cfg.embedding_fn = config.embedding_fn
graph_store_type = os.getenv("GRAPH_STORE_TYPE") or config.graph_store_type
return GraphStoreFactory.create(graph_store_type, configure)
graph_store_type = os.getenv("GRAPH_STORE_TYPE") or config.type
return GraphStoreFactory.create(graph_store_type, configure, config.dict())
def __init_graph_store_adapter(self):
return GraphStoreAdapterFactory.create(self._graph_store)

View File

@@ -1,131 +0,0 @@
"""Vector Store Module."""
from typing import Tuple, Type
def _import_pgvector() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.pgvector_store import (
PGVectorConfig,
PGVectorStore,
)
return PGVectorStore, PGVectorConfig
def _import_milvus() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.milvus_store import (
MilvusStore,
MilvusVectorConfig,
)
return MilvusStore, MilvusVectorConfig
def _import_chroma() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.chroma_store import (
ChromaStore,
ChromaVectorConfig,
)
return ChromaStore, ChromaVectorConfig
def _import_weaviate() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.weaviate_store import (
WeaviateStore,
WeaviateVectorConfig,
)
return WeaviateStore, WeaviateVectorConfig
def _import_oceanbase() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.oceanbase_store import (
OceanBaseConfig,
OceanBaseStore,
)
return OceanBaseStore, OceanBaseConfig
def _import_elastic() -> Tuple[Type, Type]:
from dbgpt_ext.storage.vector_store.elastic_store import (
ElasticsearchVectorConfig,
ElasticStore,
)
return ElasticStore, ElasticsearchVectorConfig
def _import_builtin_knowledge_graph() -> Tuple[Type, Type]:
from dbgpt_ext.storage.knowledge_graph.knowledge_graph import (
BuiltinKnowledgeGraph,
BuiltinKnowledgeGraphConfig,
)
return BuiltinKnowledgeGraph, BuiltinKnowledgeGraphConfig
def _import_community_summary_knowledge_graph() -> Tuple[Type, Type]:
from dbgpt_ext.storage.knowledge_graph.community_summary import (
CommunitySummaryKnowledgeGraph,
CommunitySummaryKnowledgeGraphConfig,
)
return CommunitySummaryKnowledgeGraph, CommunitySummaryKnowledgeGraphConfig
def _import_openspg() -> Tuple[Type, Type]:
from dbgpt_ext.storage.knowledge_graph.open_spg import OpenSPG, OpenSPGConfig
return OpenSPG, OpenSPGConfig
def _import_full_text() -> Tuple[Type, Type]:
from dbgpt_ext.storage.full_text.elasticsearch import (
ElasticDocumentConfig,
ElasticDocumentStore,
)
return ElasticDocumentStore, ElasticDocumentConfig
def __getattr__(name: str) -> Tuple[Type, Type]:
if name == "Chroma":
return _import_chroma()
elif name == "Milvus":
return _import_milvus()
elif name == "Weaviate":
return _import_weaviate()
elif name == "PGVector":
return _import_pgvector()
elif name == "OceanBase":
return _import_oceanbase()
elif name == "ElasticSearch":
return _import_elastic()
elif name == "KnowledgeGraph":
return _import_builtin_knowledge_graph()
elif name == "CommunitySummaryKnowledgeGraph":
return _import_community_summary_knowledge_graph()
elif name == "OpenSPG":
return _import_openspg()
elif name == "FullText":
return _import_full_text()
else:
raise AttributeError(f"Could not find: {name}")
__vector_store__ = [
"Chroma",
"Milvus",
"Weaviate",
"OceanBase",
"PGVector",
"ElasticSearch",
]
__knowledge_graph__ = ["KnowledgeGraph", "CommunitySummaryKnowledgeGraph", "OpenSPG"]
__document_store__ = ["FullText"]
__all__ = __vector_store__ + __knowledge_graph__ + __document_store__

View File

@@ -4,10 +4,6 @@ import logging
import os
from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
from chromadb import PersistentClient
from chromadb.api.client import SharedSystemClient
from chromadb.config import Settings
from dbgpt._private.pydantic import ConfigDict, Field
from dbgpt.configs.model_config import PILOT_PATH
from dbgpt.core import Chunk
@@ -85,7 +81,10 @@ class ChromaStore(VectorStoreBase):
"""
super().__init__()
self._vector_store_config = vector_store_config
try:
from chromadb import PersistentClient, Settings
except ImportError:
raise ImportError("Please install chroma package first.")
chroma_vector_config = vector_store_config.to_dict(exclude_none=True)
chroma_path = chroma_vector_config.get(
"persist_path", os.path.join(PILOT_PATH, "data")
@@ -203,8 +202,11 @@ class ChromaStore(VectorStoreBase):
def delete_vector_name(self, vector_name: str):
"""Delete vector name."""
try:
from chromadb.api.client import SharedSystemClient
except ImportError:
raise ImportError("Please install chroma package first.")
logger.info(f"chroma vector_name:{vector_name} begin delete...")
# self.vector_store_client.delete_collection()
self._chroma_client.delete_collection(self._collection.name)
SharedSystemClient.clear_system_cache()
self._clean_persist_folder()