mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-09 04:50:37 +00:00
Community: Adding bulk_size as a setable param for OpenSearchVectorSearch (#28325)
Description: When using langchain.retrievers.parent_document_retriever.py with vectorstore is OpenSearchVectorSearch, I found that the bulk_size param I passed into OpenSearchVectorSearch class did not work on my ParentDocumentRetriever.add_documents() function correctly, it will be overwrite with int 500 the function which OpenSearchVectorSearch class had (e.g., add_texts(), add_embeddings()...). So I made this PR requset to fix this, thanks! --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
0af5ad8262
commit
ba9b95cd23
@ -402,6 +402,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
self.client = _get_opensearch_client(opensearch_url, **kwargs)
|
self.client = _get_opensearch_client(opensearch_url, **kwargs)
|
||||||
self.async_client = _get_async_opensearch_client(opensearch_url, **kwargs)
|
self.async_client = _get_async_opensearch_client(opensearch_url, **kwargs)
|
||||||
self.engine = kwargs.get("engine", "nmslib")
|
self.engine = kwargs.get("engine", "nmslib")
|
||||||
|
self.bulk_size = kwargs.get("bulk_size", 500)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def embeddings(self) -> Embeddings:
|
def embeddings(self) -> Embeddings:
|
||||||
@ -413,9 +414,10 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
embeddings: List[List[float]],
|
embeddings: List[List[float]],
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else self.bulk_size
|
||||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||||
index_name = kwargs.get("index_name", self.index_name)
|
index_name = kwargs.get("index_name", self.index_name)
|
||||||
text_field = kwargs.get("text_field", "text")
|
text_field = kwargs.get("text_field", "text")
|
||||||
@ -454,9 +456,10 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
embeddings: List[List[float]],
|
embeddings: List[List[float]],
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else self.bulk_size
|
||||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||||
index_name = kwargs.get("index_name", self.index_name)
|
index_name = kwargs.get("index_name", self.index_name)
|
||||||
text_field = kwargs.get("text_field", "text")
|
text_field = kwargs.get("text_field", "text")
|
||||||
@ -560,7 +563,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
texts: Iterable[str],
|
texts: Iterable[str],
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Run more texts through the embeddings and add to the vectorstore.
|
"""Run more texts through the embeddings and add to the vectorstore.
|
||||||
@ -582,6 +585,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
to "text".
|
to "text".
|
||||||
"""
|
"""
|
||||||
embeddings = self.embedding_function.embed_documents(list(texts))
|
embeddings = self.embedding_function.embed_documents(list(texts))
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else self.bulk_size
|
||||||
return self.__add(
|
return self.__add(
|
||||||
texts,
|
texts,
|
||||||
embeddings,
|
embeddings,
|
||||||
@ -596,7 +600,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
texts: Iterable[str],
|
texts: Iterable[str],
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
@ -604,6 +608,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
and add to the vectorstore.
|
and add to the vectorstore.
|
||||||
"""
|
"""
|
||||||
embeddings = await self.embedding_function.aembed_documents(list(texts))
|
embeddings = await self.embedding_function.aembed_documents(list(texts))
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else self.bulk_size
|
||||||
return await self.__aadd(
|
return await self.__aadd(
|
||||||
texts,
|
texts,
|
||||||
embeddings,
|
embeddings,
|
||||||
@ -618,7 +623,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
text_embeddings: Iterable[Tuple[str, List[float]]],
|
text_embeddings: Iterable[Tuple[str, List[float]]],
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Add the given texts and embeddings to the vectorstore.
|
"""Add the given texts and embeddings to the vectorstore.
|
||||||
@ -641,6 +646,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
to "text".
|
to "text".
|
||||||
"""
|
"""
|
||||||
texts, embeddings = zip(*text_embeddings)
|
texts, embeddings = zip(*text_embeddings)
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else self.bulk_size
|
||||||
return self.__add(
|
return self.__add(
|
||||||
list(texts),
|
list(texts),
|
||||||
list(embeddings),
|
list(embeddings),
|
||||||
@ -1085,7 +1091,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
texts: List[str],
|
texts: List[str],
|
||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> OpenSearchVectorSearch:
|
) -> OpenSearchVectorSearch:
|
||||||
@ -1134,6 +1140,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
embeddings = embedding.embed_documents(texts)
|
embeddings = embedding.embed_documents(texts)
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else cls.bulk_size
|
||||||
return cls.from_embeddings(
|
return cls.from_embeddings(
|
||||||
embeddings,
|
embeddings,
|
||||||
texts,
|
texts,
|
||||||
@ -1150,7 +1157,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
texts: List[str],
|
texts: List[str],
|
||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> OpenSearchVectorSearch:
|
) -> OpenSearchVectorSearch:
|
||||||
@ -1199,6 +1206,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
embeddings = await embedding.aembed_documents(texts)
|
embeddings = await embedding.aembed_documents(texts)
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else cls.bulk_size
|
||||||
return await cls.afrom_embeddings(
|
return await cls.afrom_embeddings(
|
||||||
embeddings,
|
embeddings,
|
||||||
texts,
|
texts,
|
||||||
@ -1216,7 +1224,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
texts: List[str],
|
texts: List[str],
|
||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> OpenSearchVectorSearch:
|
) -> OpenSearchVectorSearch:
|
||||||
@ -1285,6 +1293,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
"max_chunk_bytes",
|
"max_chunk_bytes",
|
||||||
"is_aoss",
|
"is_aoss",
|
||||||
]
|
]
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else cls.bulk_size
|
||||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||||
dim = len(embeddings[0])
|
dim = len(embeddings[0])
|
||||||
# Get the index name from either from kwargs or ENV Variable
|
# Get the index name from either from kwargs or ENV Variable
|
||||||
@ -1346,7 +1355,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
texts: List[str],
|
texts: List[str],
|
||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
bulk_size: int = 500,
|
bulk_size: Optional[int] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> OpenSearchVectorSearch:
|
) -> OpenSearchVectorSearch:
|
||||||
@ -1417,6 +1426,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
"max_chunk_bytes",
|
"max_chunk_bytes",
|
||||||
"is_aoss",
|
"is_aoss",
|
||||||
]
|
]
|
||||||
|
bulk_size = bulk_size if bulk_size is not None else cls.bulk_size
|
||||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||||
dim = len(embeddings[0])
|
dim = len(embeddings[0])
|
||||||
# Get the index name from either from kwargs or ENV Variable
|
# Get the index name from either from kwargs or ENV Variable
|
||||||
|
Loading…
Reference in New Issue
Block a user