mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-14 05:31:40 +00:00
feat(ChatKnowledge): ChatKnowledge Support Keyword Retrieve (#1624)
Co-authored-by: Fangyin Cheng <staneyffer@gmail.com>
This commit is contained in:
@@ -5,10 +5,10 @@ from typing import List, Optional
|
||||
from dbgpt.core import Chunk
|
||||
from dbgpt.core.interface.operators.retriever import RetrieverOperator
|
||||
from dbgpt.datasource.base import BaseConnector
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
|
||||
from ..assembler.db_schema import DBSchemaAssembler
|
||||
from ..chunk_manager import ChunkParameters
|
||||
from ..index.base import IndexStoreBase
|
||||
from ..retriever.db_schema import DBSchemaRetriever
|
||||
from .assembler import AssemblerOperator
|
||||
|
||||
@@ -19,13 +19,13 @@ class DBSchemaRetrieverOperator(RetrieverOperator[str, List[Chunk]]):
|
||||
Args:
|
||||
connector (BaseConnector): The connection.
|
||||
top_k (int, optional): The top k. Defaults to 4.
|
||||
vector_store_connector (VectorStoreConnector, optional): The vector store
|
||||
index_store (IndexStoreBase, optional): The vector store
|
||||
connector. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vector_store_connector: VectorStoreConnector,
|
||||
index_store: IndexStoreBase,
|
||||
top_k: int = 4,
|
||||
connector: Optional[BaseConnector] = None,
|
||||
**kwargs
|
||||
@@ -35,7 +35,7 @@ class DBSchemaRetrieverOperator(RetrieverOperator[str, List[Chunk]]):
|
||||
self._retriever = DBSchemaRetriever(
|
||||
top_k=top_k,
|
||||
connector=connector,
|
||||
vector_store_connector=vector_store_connector,
|
||||
index_store=index_store,
|
||||
)
|
||||
|
||||
def retrieve(self, query: str) -> List[Chunk]:
|
||||
@@ -53,7 +53,7 @@ class DBSchemaAssemblerOperator(AssemblerOperator[BaseConnector, List[Chunk]]):
|
||||
def __init__(
|
||||
self,
|
||||
connector: BaseConnector,
|
||||
vector_store_connector: VectorStoreConnector,
|
||||
index_store: IndexStoreBase,
|
||||
chunk_parameters: Optional[ChunkParameters] = None,
|
||||
**kwargs
|
||||
):
|
||||
@@ -61,14 +61,14 @@ class DBSchemaAssemblerOperator(AssemblerOperator[BaseConnector, List[Chunk]]):
|
||||
|
||||
Args:
|
||||
connector (BaseConnector): The connection.
|
||||
vector_store_connector (VectorStoreConnector): The vector store connector.
|
||||
index_store (IndexStoreBase): The Storage IndexStoreBase.
|
||||
chunk_parameters (Optional[ChunkParameters], optional): The chunk
|
||||
parameters.
|
||||
"""
|
||||
if not chunk_parameters:
|
||||
chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE")
|
||||
self._chunk_parameters = chunk_parameters
|
||||
self._vector_store_connector = vector_store_connector
|
||||
self._index_store = index_store
|
||||
self._connector = connector
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@@ -84,7 +84,7 @@ class DBSchemaAssemblerOperator(AssemblerOperator[BaseConnector, List[Chunk]]):
|
||||
assembler = DBSchemaAssembler.load_from_connection(
|
||||
connector=self._connector,
|
||||
chunk_parameters=self._chunk_parameters,
|
||||
vector_store_connector=self._vector_store_connector,
|
||||
index_store=self._index_store,
|
||||
)
|
||||
assembler.persist()
|
||||
return assembler.get_chunks()
|
||||
|
@@ -6,11 +6,11 @@ from typing import List, Optional, Union
|
||||
from dbgpt.core import Chunk
|
||||
from dbgpt.core.awel.flow import IOField, OperatorCategory, Parameter, ViewMetadata
|
||||
from dbgpt.core.interface.operators.retriever import RetrieverOperator
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
from dbgpt.util.i18n_utils import _
|
||||
|
||||
from ..assembler.embedding import EmbeddingAssembler
|
||||
from ..chunk_manager import ChunkParameters
|
||||
from ..index.base import IndexStoreBase
|
||||
from ..knowledge import Knowledge
|
||||
from ..retriever.embedding import EmbeddingRetriever
|
||||
from ..retriever.rerank import Ranker
|
||||
@@ -28,9 +28,9 @@ class EmbeddingRetrieverOperator(RetrieverOperator[Union[str, List[str]], List[C
|
||||
category=OperatorCategory.RAG,
|
||||
parameters=[
|
||||
Parameter.build_from(
|
||||
_("Vector Store Connector"),
|
||||
_("Storage Index Store"),
|
||||
"vector_store_connector",
|
||||
VectorStoreConnector,
|
||||
IndexStoreBase,
|
||||
description=_("The vector store connector."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
@@ -88,7 +88,7 @@ class EmbeddingRetrieverOperator(RetrieverOperator[Union[str, List[str]], List[C
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vector_store_connector: VectorStoreConnector,
|
||||
index_store: IndexStoreBase,
|
||||
top_k: int,
|
||||
score_threshold: float = 0.3,
|
||||
query_rewrite: Optional[QueryRewrite] = None,
|
||||
@@ -99,7 +99,7 @@ class EmbeddingRetrieverOperator(RetrieverOperator[Union[str, List[str]], List[C
|
||||
super().__init__(**kwargs)
|
||||
self._score_threshold = score_threshold
|
||||
self._retriever = EmbeddingRetriever(
|
||||
vector_store_connector=vector_store_connector,
|
||||
index_store=index_store,
|
||||
top_k=top_k,
|
||||
query_rewrite=query_rewrite,
|
||||
rerank=rerank,
|
||||
@@ -129,7 +129,7 @@ class EmbeddingAssemblerOperator(AssemblerOperator[Knowledge, List[Chunk]]):
|
||||
Parameter.build_from(
|
||||
_("Vector Store Connector"),
|
||||
"vector_store_connector",
|
||||
VectorStoreConnector,
|
||||
IndexStoreBase,
|
||||
description=_("The vector store connector."),
|
||||
),
|
||||
Parameter.build_from(
|
||||
@@ -164,21 +164,21 @@ class EmbeddingAssemblerOperator(AssemblerOperator[Knowledge, List[Chunk]]):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vector_store_connector: VectorStoreConnector,
|
||||
index_store: IndexStoreBase,
|
||||
chunk_parameters: Optional[ChunkParameters] = None,
|
||||
**kwargs
|
||||
):
|
||||
"""Create a new EmbeddingAssemblerOperator.
|
||||
|
||||
Args:
|
||||
vector_store_connector (VectorStoreConnector): The vector store connector.
|
||||
index_store (IndexStoreBase): The index storage.
|
||||
chunk_parameters (Optional[ChunkParameters], optional): The chunk
|
||||
parameters. Defaults to ChunkParameters(chunk_strategy="CHUNK_BY_SIZE").
|
||||
"""
|
||||
if not chunk_parameters:
|
||||
chunk_parameters = ChunkParameters(chunk_strategy="CHUNK_BY_SIZE")
|
||||
self._chunk_parameters = chunk_parameters
|
||||
self._vector_store_connector = vector_store_connector
|
||||
self._index_store = index_store
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def assemble(self, knowledge: Knowledge) -> List[Chunk]:
|
||||
@@ -186,7 +186,7 @@ class EmbeddingAssemblerOperator(AssemblerOperator[Knowledge, List[Chunk]]):
|
||||
assembler = EmbeddingAssembler.load_from_knowledge(
|
||||
knowledge=knowledge,
|
||||
chunk_parameters=self._chunk_parameters,
|
||||
vector_store_connector=self._vector_store_connector,
|
||||
index_store=self._index_store,
|
||||
)
|
||||
assembler.persist()
|
||||
return assembler.get_chunks()
|
||||
|
@@ -8,8 +8,8 @@ from typing import Any, Optional
|
||||
from dbgpt.core import LLMClient
|
||||
from dbgpt.core.awel import MapOperator
|
||||
from dbgpt.datasource.base import BaseConnector
|
||||
from dbgpt.rag.index.base import IndexStoreBase
|
||||
from dbgpt.rag.schemalinker.schema_linking import SchemaLinking
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
|
||||
|
||||
class SchemaLinkingOperator(MapOperator[Any, Any]):
|
||||
@@ -21,7 +21,7 @@ class SchemaLinkingOperator(MapOperator[Any, Any]):
|
||||
model_name: str,
|
||||
llm: LLMClient,
|
||||
top_k: int = 5,
|
||||
vector_store_connector: Optional[VectorStoreConnector] = None,
|
||||
index_store: Optional[IndexStoreBase] = None,
|
||||
**kwargs
|
||||
):
|
||||
"""Create the schema linking operator.
|
||||
@@ -37,7 +37,7 @@ class SchemaLinkingOperator(MapOperator[Any, Any]):
|
||||
connector=connector,
|
||||
llm=llm,
|
||||
model_name=model_name,
|
||||
vector_store_connector=vector_store_connector,
|
||||
index_store=index_store,
|
||||
)
|
||||
|
||||
async def map(self, query: str) -> str:
|
||||
|
Reference in New Issue
Block a user