fix(ChatKnowledge): add aload_document (#1548)

This commit is contained in:
Aries-ckt
2024-05-23 11:59:34 +08:00
committed by GitHub
parent 7f55aa4b6e
commit 83d7e9d82d
14 changed files with 180 additions and 238 deletions

View File

@@ -8,6 +8,7 @@ from typing import Any, Dict, List, Optional
from dbgpt._private.pydantic import BaseModel, ConfigDict, Field, model_to_dict
from dbgpt.core import Chunk, Embeddings
from dbgpt.storage.vector_store.filters import MetadataFilters
from dbgpt.util.executor_utils import blocking_func_to_async
logger = logging.getLogger(__name__)
@@ -46,6 +47,10 @@ class IndexStoreConfig(BaseModel):
class IndexStoreBase(ABC):
"""Index store base class."""
def __init__(self, executor: Optional[ThreadPoolExecutor] = None):
"""Init index store."""
self._executor = executor or ThreadPoolExecutor()
@abstractmethod
def load_document(self, chunks: List[Chunk]) -> List[str]:
"""Load document in index database.
@@ -143,6 +148,27 @@ class IndexStoreBase(ABC):
)
return ids
async def aload_document_with_limit(
self, chunks: List[Chunk], max_chunks_once_load: int = 10, max_threads: int = 1
) -> List[str]:
"""Load document in index database with specified limit.
Args:
chunks(List[Chunk]): Document chunks.
max_chunks_once_load(int): Max number of chunks to load at once.
max_threads(int): Max number of threads to use.
Return:
List[str]: Chunk ids.
"""
return await blocking_func_to_async(
self._executor,
self.load_document_with_limit,
chunks,
max_chunks_once_load,
max_threads,
)
def similar_search(
self, text: str, topk: int, filters: Optional[MetadataFilters] = None
) -> List[Chunk]: