fix: VectorStore can not be attached to EmbeddingAssemblerOperator bug (#2170)

This commit is contained in:
Aries-ckt 2024-12-02 20:56:23 +08:00 committed by GitHub
parent a14eeb56dd
commit 4fa60037dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 121 additions and 66 deletions

View File

@ -232,6 +232,7 @@ class PDFKnowledge(Knowledge):
"page": page, "page": page,
"type": "excel", "type": "excel",
"title": file_title, "title": file_title,
"source": self.file_path,
} }
page_documents.append( page_documents.append(
Document( Document(
@ -244,6 +245,7 @@ class PDFKnowledge(Knowledge):
"page": page, "page": page,
"type": "text", "type": "text",
"title": file_title, "title": file_title,
"source": self.file_path,
} }
page_documents.append( page_documents.append(
Document(content=inside_content, metadata=content_metadata) Document(content=inside_content, metadata=content_metadata)

View File

@ -155,7 +155,7 @@ class EmbeddingAssemblerOperator(AssemblerOperator[Knowledge, List[Chunk]]):
IOField.build_from( IOField.build_from(
_("Chunks"), _("Chunks"),
"chunks", "chunks",
Chunk, List[Chunk],
description=_( description=_(
"The assembled chunks, it has been persisted to vector " "store." "The assembled chunks, it has been persisted to vector " "store."
), ),

View File

@ -20,7 +20,7 @@ class KnowledgeOperator(MapOperator[str, Knowledge]):
"""Knowledge Factory Operator.""" """Knowledge Factory Operator."""
metadata = ViewMetadata( metadata = ViewMetadata(
label=_("Knowledge Operator"), label=_("Knowledge Loader Operator"),
name="knowledge_operator", name="knowledge_operator",
category=OperatorCategory.RAG, category=OperatorCategory.RAG,
description=_( description=_(
@ -30,7 +30,7 @@ class KnowledgeOperator(MapOperator[str, Knowledge]):
IOField.build_from( IOField.build_from(
_("knowledge datasource"), _("knowledge datasource"),
"knowledge datasource", "knowledge datasource",
str, dict,
_("knowledge datasource, which can be a document, url, or text."), _("knowledge datasource, which can be a document, url, or text."),
) )
], ],
@ -89,7 +89,7 @@ class KnowledgeOperator(MapOperator[str, Knowledge]):
self._datasource = datasource self._datasource = datasource
self._knowledge_type = KnowledgeType.get_by_value(knowledge_type) self._knowledge_type = KnowledgeType.get_by_value(knowledge_type)
async def map(self, datasource: str) -> Knowledge: async def map(self, datasource: dict) -> Knowledge:
"""Create knowledge from datasource.""" """Create knowledge from datasource."""
if self._datasource: if self._datasource:
datasource = self._datasource datasource = self._datasource
@ -120,7 +120,7 @@ class ChunksToStringOperator(MapOperator[List[Chunk], str]):
IOField.build_from( IOField.build_from(
_("Chunks"), _("Chunks"),
"chunks", "chunks",
Chunk, List[Chunk],
description=_("The input chunks."), description=_("The input chunks."),
is_list=True, is_list=True,
) )

View File

@ -8,17 +8,9 @@ from typing import Any, DefaultDict, Dict, List, Optional, Tuple, Type, cast
from dbgpt.app.component_configs import CFG from dbgpt.app.component_configs import CFG
from dbgpt.core import Chunk, Embeddings from dbgpt.core import Chunk, Embeddings
from dbgpt.core.awel.flow import (
FunctionDynamicOptions,
OptionValue,
Parameter,
ResourceCategory,
register_resource,
)
from dbgpt.rag.index.base import IndexStoreBase, IndexStoreConfig from dbgpt.rag.index.base import IndexStoreBase, IndexStoreConfig
from dbgpt.storage.vector_store.base import VectorStoreConfig from dbgpt.storage.vector_store.base import VectorStoreConfig
from dbgpt.storage.vector_store.filters import MetadataFilters from dbgpt.storage.vector_store.filters import MetadataFilters
from dbgpt.util.i18n_utils import _
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -26,40 +18,6 @@ connector: Dict[str, Tuple[Type, Type]] = {}
pools: DefaultDict[str, Dict] = defaultdict(dict) pools: DefaultDict[str, Dict] = defaultdict(dict)
def _load_vector_options() -> List[OptionValue]:
from dbgpt.storage import vector_store
return [
OptionValue(label=cls, name=cls, value=cls)
for cls in vector_store.__all__
if issubclass(getattr(vector_store, cls)[0], IndexStoreBase)
]
@register_resource(
_("Vector Store Connector"),
"vector_store_connector",
category=ResourceCategory.VECTOR_STORE,
parameters=[
Parameter.build_from(
_("Vector Store Type"),
"vector_store_type",
str,
description=_("The type of vector store."),
options=FunctionDynamicOptions(func=_load_vector_options),
),
Parameter.build_from(
_("Vector Store Implementation"),
"vector_store_config",
VectorStoreConfig,
description=_("The vector store implementation."),
optional=True,
default=None,
),
],
# Compatible with the old version
alias=["dbgpt.storage.vector_store.connector.VectorStoreConnector"],
)
class VectorStoreConnector: class VectorStoreConnector:
"""The connector for vector store. """The connector for vector store.

View File

@ -197,7 +197,6 @@ class CommunitySummaryKnowledgeGraph(BuiltinKnowledgeGraph):
async def _aload_document_graph(self, chunks: List[Chunk]) -> None: async def _aload_document_graph(self, chunks: List[Chunk]) -> None:
"""Load the knowledge graph from the chunks. """Load the knowledge graph from the chunks.
The chunks include the doc structure. The chunks include the doc structure.
""" """
if not self._document_graph_enabled: if not self._document_graph_enabled:

View File

@ -21,10 +21,10 @@ CHROMA_COLLECTION_NAME = "langchain"
@register_resource( @register_resource(
_("Chroma Vector Store"), _("Chroma Config"),
"chroma_vector_store", "chroma_vector_config",
category=ResourceCategory.VECTOR_STORE, category=ResourceCategory.VECTOR_STORE,
description=_("Chroma vector store."), description=_("Chroma vector store config."),
parameters=[ parameters=[
*_COMMON_PARAMETERS, *_COMMON_PARAMETERS,
Parameter.build_from( Parameter.build_from(
@ -53,6 +53,22 @@ class ChromaVectorConfig(VectorStoreConfig):
) )
@register_resource(
_("Chroma Vector Store"),
"chroma_vector_store",
category=ResourceCategory.VECTOR_STORE,
description=_("Chroma vector store."),
parameters=[
Parameter.build_from(
_("Chroma Config"),
"vector_store_config",
ChromaVectorConfig,
description=_("the chroma config of vector store."),
optional=True,
default=None,
),
],
)
class ChromaStore(VectorStoreBase): class ChromaStore(VectorStoreBase):
"""Chroma vector store.""" """Chroma vector store."""

View File

@ -22,8 +22,8 @@ logger = logging.getLogger(__name__)
@register_resource( @register_resource(
_("ElasticSearch Vector Store"), _("Elastic Vector Config"),
"elasticsearch_vector_store", "elasticsearch_vector_config",
category=ResourceCategory.VECTOR_STORE, category=ResourceCategory.VECTOR_STORE,
parameters=[ parameters=[
*_COMMON_PARAMETERS, *_COMMON_PARAMETERS,
@ -72,7 +72,7 @@ logger = logging.getLogger(__name__)
default="index_name_test", default="index_name_test",
), ),
], ],
description=_("Elasticsearch vector store."), description=_("Elasticsearch vector config."),
) )
class ElasticsearchVectorConfig(VectorStoreConfig): class ElasticsearchVectorConfig(VectorStoreConfig):
"""Elasticsearch vector store config.""" """Elasticsearch vector store config."""
@ -116,6 +116,22 @@ class ElasticsearchVectorConfig(VectorStoreConfig):
) )
@register_resource(
_("Elastic Vector Store"),
"elastic_vector_store",
category=ResourceCategory.VECTOR_STORE,
description=_("Elastic vector store."),
parameters=[
Parameter.build_from(
_("Elastic Config"),
"vector_store_config",
ElasticsearchVectorConfig,
description=_("the elastic config of vector store."),
optional=True,
default=None,
),
],
)
class ElasticStore(VectorStoreBase): class ElasticStore(VectorStoreBase):
"""Elasticsearch vector store.""" """Elasticsearch vector store."""

View File

@ -22,8 +22,8 @@ logger = logging.getLogger(__name__)
@register_resource( @register_resource(
_("Milvus Vector Store"), _("Milvus Config"),
"milvus_vector_store", "milvus_vector_config",
category=ResourceCategory.VECTOR_STORE, category=ResourceCategory.VECTOR_STORE,
parameters=[ parameters=[
*_COMMON_PARAMETERS, *_COMMON_PARAMETERS,
@ -91,7 +91,7 @@ logger = logging.getLogger(__name__)
default="vector", default="vector",
), ),
], ],
description=_("Milvus vector store."), description=_("Milvus vector config."),
) )
class MilvusVectorConfig(VectorStoreConfig): class MilvusVectorConfig(VectorStoreConfig):
"""Milvus vector store config.""" """Milvus vector store config."""
@ -139,6 +139,22 @@ class MilvusVectorConfig(VectorStoreConfig):
) )
@register_resource(
_("Milvus Vector Store"),
"milvus_vector_store",
category=ResourceCategory.VECTOR_STORE,
description=_("Milvus vector store."),
parameters=[
Parameter.build_from(
_("Milvus Config"),
"vector_store_config",
MilvusVectorConfig,
description=_("the milvus config of vector store."),
optional=True,
default=None,
),
],
)
class MilvusStore(VectorStoreBase): class MilvusStore(VectorStoreBase):
"""Milvus vector store.""" """Milvus vector store."""

View File

@ -73,8 +73,8 @@ def _normalize(vector: List[float]) -> List[float]:
@register_resource( @register_resource(
_("OceanBase Vector Store"), _("OceanBase Config"),
"oceanbase_vector_store", "oceanbase_vector_config",
category=ResourceCategory.VECTOR_STORE, category=ResourceCategory.VECTOR_STORE,
parameters=[ parameters=[
*_COMMON_PARAMETERS, *_COMMON_PARAMETERS,
@ -119,7 +119,7 @@ def _normalize(vector: List[float]) -> List[float]:
default=None, default=None,
), ),
], ],
description="OceanBase vector store.", description="OceanBase vector store config.",
) )
class OceanBaseConfig(VectorStoreConfig): class OceanBaseConfig(VectorStoreConfig):
"""OceanBase vector store config.""" """OceanBase vector store config."""
@ -152,6 +152,22 @@ class OceanBaseConfig(VectorStoreConfig):
) )
@register_resource(
_("OceanBase Vector Store"),
"ob_vector_store",
category=ResourceCategory.VECTOR_STORE,
description=_("OceanBase vector store."),
parameters=[
Parameter.build_from(
_("OceanBase Config"),
"vector_store_config",
OceanBaseConfig,
description=_("the ob config of vector store."),
optional=True,
default=None,
),
],
)
class OceanBaseStore(VectorStoreBase): class OceanBaseStore(VectorStoreBase):
"""OceanBase vector store.""" """OceanBase vector store."""

View File

@ -18,8 +18,8 @@ logger = logging.getLogger(__name__)
@register_resource( @register_resource(
_("PG Vector Store"), _("PGVector Config"),
"pg_vector_store", "pg_vector_config",
category=ResourceCategory.VECTOR_STORE, category=ResourceCategory.VECTOR_STORE,
parameters=[ parameters=[
*_COMMON_PARAMETERS, *_COMMON_PARAMETERS,
@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
default=None, default=None,
), ),
], ],
description="PG vector store.", description="PG vector config.",
) )
class PGVectorConfig(VectorStoreConfig): class PGVectorConfig(VectorStoreConfig):
"""PG vector store config.""" """PG vector store config."""
@ -49,6 +49,22 @@ class PGVectorConfig(VectorStoreConfig):
) )
@register_resource(
_("PG Vector Store"),
"pg_vector_store",
category=ResourceCategory.VECTOR_STORE,
description=_("PG vector store."),
parameters=[
Parameter.build_from(
_("PG Config"),
"vector_store_config",
PGVectorConfig,
description=_("the pg config of vector store."),
optional=True,
default=None,
),
],
)
class PGVectorStore(VectorStoreBase): class PGVectorStore(VectorStoreBase):
"""PG vector store. """PG vector store.

View File

@ -15,10 +15,10 @@ logger = logging.getLogger(__name__)
@register_resource( @register_resource(
_("Weaviate Vector Store"), _("Weaviate Config"),
"weaviate_vector_store", "weaviate_vector_config",
category=ResourceCategory.VECTOR_STORE, category=ResourceCategory.VECTOR_STORE,
description=_("Weaviate vector store."), description=_("Weaviate vector config."),
parameters=[ parameters=[
*_COMMON_PARAMETERS, *_COMMON_PARAMETERS,
Parameter.build_from( Parameter.build_from(
@ -56,6 +56,22 @@ class WeaviateVectorConfig(VectorStoreConfig):
) )
@register_resource(
_("Weaviate Vector Store"),
"weaviate_vector_store",
category=ResourceCategory.VECTOR_STORE,
description=_("Weaviate vector store."),
parameters=[
Parameter.build_from(
_("Weaviate Config"),
"vector_store_config",
WeaviateVectorConfig,
description=_("the weaviate config of vector store."),
optional=True,
default=None,
),
],
)
class WeaviateStore(VectorStoreBase): class WeaviateStore(VectorStoreBase):
"""Weaviate database.""" """Weaviate database."""