mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-12 20:53:48 +00:00
fix(ChatKnowledge): add aload_document (#1548)
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
"""Embedding Assembler."""
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from dbgpt.core import Chunk, Embeddings
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
|
||||
from ...util.executor_utils import blocking_func_to_async
|
||||
from ..assembler.base import BaseAssembler
|
||||
from ..chunk_manager import ChunkParameters
|
||||
from ..embedding.embedding_factory import DefaultEmbeddingFactory
|
||||
@@ -98,6 +100,41 @@ class EmbeddingAssembler(BaseAssembler):
|
||||
embeddings=embeddings,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def aload_from_knowledge(
|
||||
cls,
|
||||
knowledge: Knowledge,
|
||||
vector_store_connector: VectorStoreConnector,
|
||||
chunk_parameters: Optional[ChunkParameters] = None,
|
||||
embedding_model: Optional[str] = None,
|
||||
embeddings: Optional[Embeddings] = None,
|
||||
executor: Optional[ThreadPoolExecutor] = None,
|
||||
) -> "EmbeddingAssembler":
|
||||
"""Load document embedding into vector store from path.
|
||||
|
||||
Args:
|
||||
knowledge: (Knowledge) Knowledge datasource.
|
||||
vector_store_connector: (VectorStoreConnector) VectorStoreConnector to use.
|
||||
chunk_parameters: (Optional[ChunkParameters]) ChunkManager to use for
|
||||
chunking.
|
||||
embedding_model: (Optional[str]) Embedding model to use.
|
||||
embeddings: (Optional[Embeddings]) Embeddings to use.
|
||||
executor: (Optional[ThreadPoolExecutor) ThreadPoolExecutor to use.
|
||||
|
||||
Returns:
|
||||
EmbeddingAssembler
|
||||
"""
|
||||
executor = executor or ThreadPoolExecutor()
|
||||
return await blocking_func_to_async(
|
||||
executor,
|
||||
cls,
|
||||
knowledge,
|
||||
vector_store_connector,
|
||||
chunk_parameters,
|
||||
embedding_model,
|
||||
embeddings,
|
||||
)
|
||||
|
||||
def persist(self) -> List[str]:
|
||||
"""Persist chunks into vector store.
|
||||
|
||||
|
@@ -8,6 +8,7 @@ from typing import Any, Dict, List, Optional
|
||||
from dbgpt._private.pydantic import BaseModel, ConfigDict, Field, model_to_dict
|
||||
from dbgpt.core import Chunk, Embeddings
|
||||
from dbgpt.storage.vector_store.filters import MetadataFilters
|
||||
from dbgpt.util.executor_utils import blocking_func_to_async
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -46,6 +47,10 @@ class IndexStoreConfig(BaseModel):
|
||||
class IndexStoreBase(ABC):
|
||||
"""Index store base class."""
|
||||
|
||||
def __init__(self, executor: Optional[ThreadPoolExecutor] = None):
|
||||
"""Init index store."""
|
||||
self._executor = executor or ThreadPoolExecutor()
|
||||
|
||||
@abstractmethod
|
||||
def load_document(self, chunks: List[Chunk]) -> List[str]:
|
||||
"""Load document in index database.
|
||||
@@ -143,6 +148,27 @@ class IndexStoreBase(ABC):
|
||||
)
|
||||
return ids
|
||||
|
||||
async def aload_document_with_limit(
|
||||
self, chunks: List[Chunk], max_chunks_once_load: int = 10, max_threads: int = 1
|
||||
) -> List[str]:
|
||||
"""Load document in index database with specified limit.
|
||||
|
||||
Args:
|
||||
chunks(List[Chunk]): Document chunks.
|
||||
max_chunks_once_load(int): Max number of chunks to load at once.
|
||||
max_threads(int): Max number of threads to use.
|
||||
|
||||
Return:
|
||||
List[str]: Chunk ids.
|
||||
"""
|
||||
return await blocking_func_to_async(
|
||||
self._executor,
|
||||
self.load_document_with_limit,
|
||||
chunks,
|
||||
max_chunks_once_load,
|
||||
max_threads,
|
||||
)
|
||||
|
||||
def similar_search(
|
||||
self, text: str, topk: int, filters: Optional[MetadataFilters] = None
|
||||
) -> List[Chunk]:
|
||||
|
Reference in New Issue
Block a user