fix(storage): Fix load big documents error

This commit is contained in:
Fangyin Cheng
2025-04-11 20:46:48 +08:00
parent 12170e2504
commit c04e3c7cb0
11 changed files with 88 additions and 17 deletions

View File

@@ -131,8 +131,8 @@ class EmbeddingAssembler(BaseAssembler):
Returns:
List[str]: List of chunk ids.
"""
max_chunks_once_load = kwargs.get("max_chunks_once_load", 10)
max_threads = kwargs.get("max_threads", 1)
max_chunks_once_load = kwargs.get("max_chunks_once_load")
max_threads = kwargs.get("max_threads")
return self._index_store.load_document_with_limit(
self._chunks, max_chunks_once_load, max_threads
)
@@ -144,8 +144,8 @@ class EmbeddingAssembler(BaseAssembler):
List[str]: List of chunk ids.
"""
# persist chunks into vector store
max_chunks_once_load = kwargs.get("max_chunks_once_load", 10)
max_threads = kwargs.get("max_threads", 1)
max_chunks_once_load = kwargs.get("max_chunks_once_load")
max_threads = kwargs.get("max_threads")
return await self._index_store.aload_document_with_limit(
self._chunks, max_chunks_once_load, max_threads
)

View File

@@ -91,6 +91,8 @@ class ChromaStore(VectorStoreBase):
embedding_fn: Optional[Embeddings] = None,
chroma_client: Optional["PersistentClient"] = None, # type: ignore # noqa
collection_metadata: Optional[dict] = None,
max_chunks_once_load: Optional[int] = None,
max_threads: Optional[int] = None,
) -> None:
"""Create a ChromaStore instance.
@@ -100,8 +102,12 @@ class ChromaStore(VectorStoreBase):
embedding_fn(Embeddings): embedding function.
chroma_client(PersistentClient): chroma client.
collection_metadata(dict): collection metadata.
max_chunks_once_load(int): max chunks once load.
max_threads(int): max threads.
"""
super().__init__()
super().__init__(
max_chunks_once_load=max_chunks_once_load, max_threads=max_threads
)
self._vector_store_config = vector_store_config
try:
from chromadb import PersistentClient, Settings

View File

@@ -157,13 +157,17 @@ class ElasticStore(VectorStoreBase):
vector_store_config: ElasticsearchStoreConfig,
name: Optional[str],
embedding_fn: Optional[Embeddings] = None,
max_chunks_once_load: Optional[int] = None,
max_threads: Optional[int] = None,
) -> None:
"""Create a ElasticsearchStore instance.
Args:
vector_store_config (ElasticsearchStoreConfig): ElasticsearchStore config.
"""
super().__init__()
super().__init__(
max_chunks_once_load=max_chunks_once_load, max_threads=max_threads
)
self._vector_store_config = vector_store_config
connect_kwargs = {}

View File

@@ -197,6 +197,8 @@ class MilvusStore(VectorStoreBase):
vector_store_config: MilvusVectorConfig,
name: Optional[str],
embedding_fn: Optional[Embeddings] = None,
max_chunks_once_load: Optional[int] = None,
max_threads: Optional[int] = None,
) -> None:
"""Create a MilvusStore instance.
@@ -204,7 +206,9 @@ class MilvusStore(VectorStoreBase):
vector_store_config (MilvusVectorConfig): MilvusStore config.
refer to https://milvus.io/docs/v2.0.x/manage_connection.md
"""
super().__init__()
super().__init__(
max_chunks_once_load=max_chunks_once_load, max_threads=max_threads
)
self._vector_store_config = vector_store_config
try:

View File

@@ -192,6 +192,8 @@ class OceanBaseStore(VectorStoreBase):
vector_store_config: OceanBaseConfig,
name: Optional[str],
embedding_fn: Optional[Embeddings] = None,
max_chunks_once_load: Optional[int] = None,
max_threads: Optional[int] = None,
) -> None:
"""Create a OceanBaseStore instance."""
try:
@@ -205,7 +207,9 @@ class OceanBaseStore(VectorStoreBase):
if vector_store_config.embedding_fn is None:
raise ValueError("embedding_fn is required for OceanBaseStore")
super().__init__()
super().__init__(
max_chunks_once_load=max_chunks_once_load, max_threads=max_threads
)
self._vector_store_config = vector_store_config
self.embedding_function = embedding_fn

View File

@@ -85,6 +85,8 @@ class PGVectorStore(VectorStoreBase):
vector_store_config: PGVectorConfig,
name: Optional[str],
embedding_fn: Optional[Embeddings] = None,
max_chunks_once_load: Optional[int] = None,
max_threads: Optional[int] = None,
) -> None:
"""Create a PGVectorStore instance."""
try:
@@ -93,7 +95,9 @@ class PGVectorStore(VectorStoreBase):
raise ImportError(
"Please install the `langchain` package to use the PGVector."
)
super().__init__()
super().__init__(
max_chunks_once_load=max_chunks_once_load, max_threads=max_threads
)
self._vector_store_config = vector_store_config
self.connection_string = vector_store_config.connection_string

View File

@@ -96,6 +96,8 @@ class WeaviateStore(VectorStoreBase):
vector_store_config: WeaviateVectorConfig,
name: Optional[str],
embedding_fn: Optional[Embeddings] = None,
max_chunks_once_load: Optional[int] = None,
max_threads: Optional[int] = None,
) -> None:
"""Initialize with Weaviate client."""
try:
@@ -105,7 +107,9 @@ class WeaviateStore(VectorStoreBase):
"Could not import weaviate python package. "
"Please install it with `pip install weaviate-client`."
)
super().__init__()
super().__init__(
max_chunks_once_load=max_chunks_once_load, max_threads=max_threads
)
self._vector_store_config = vector_store_config
self.weaviate_url = vector_store_config.weaviate_url