mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-14 07:07:34 +00:00
support max_chunk_bytes in OpensearchVectorSearch to pass down to bulk (#6855)
Support `max_chunk_bytes` kwargs to pass down to `buik` helper, in order to support the request limits in Opensearch locally and in AWS. @rlancemartin, @eyurtsev
This commit is contained in:
parent
5861770a53
commit
76d03f398d
@ -81,6 +81,7 @@ def _bulk_ingest_embeddings(
|
|||||||
vector_field: str = "vector_field",
|
vector_field: str = "vector_field",
|
||||||
text_field: str = "text",
|
text_field: str = "text",
|
||||||
mapping: Optional[Dict] = None,
|
mapping: Optional[Dict] = None,
|
||||||
|
max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Bulk Ingest Embeddings into given index."""
|
"""Bulk Ingest Embeddings into given index."""
|
||||||
if not mapping:
|
if not mapping:
|
||||||
@ -110,7 +111,7 @@ def _bulk_ingest_embeddings(
|
|||||||
}
|
}
|
||||||
requests.append(request)
|
requests.append(request)
|
||||||
return_ids.append(_id)
|
return_ids.append(_id)
|
||||||
bulk(client, requests)
|
bulk(client, requests, max_chunk_bytes=max_chunk_bytes)
|
||||||
client.indices.refresh(index=index_name)
|
client.indices.refresh(index=index_name)
|
||||||
return return_ids
|
return return_ids
|
||||||
|
|
||||||
@ -351,6 +352,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
ef_construction = _get_kwargs_value(kwargs, "ef_construction", 512)
|
ef_construction = _get_kwargs_value(kwargs, "ef_construction", 512)
|
||||||
m = _get_kwargs_value(kwargs, "m", 16)
|
m = _get_kwargs_value(kwargs, "m", 16)
|
||||||
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
|
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
|
||||||
|
max_chunk_bytes = _get_kwargs_value(kwargs, "max_chunk_bytes", 1 * 1024 * 1024)
|
||||||
|
|
||||||
mapping = _default_text_mapping(
|
mapping = _default_text_mapping(
|
||||||
dim, engine, space_type, ef_search, ef_construction, m, vector_field
|
dim, engine, space_type, ef_search, ef_construction, m, vector_field
|
||||||
@ -366,6 +368,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
vector_field=vector_field,
|
vector_field=vector_field,
|
||||||
text_field=text_field,
|
text_field=text_field,
|
||||||
mapping=mapping,
|
mapping=mapping,
|
||||||
|
max_chunk_bytes=max_chunk_bytes,
|
||||||
)
|
)
|
||||||
|
|
||||||
def similarity_search(
|
def similarity_search(
|
||||||
@ -651,6 +654,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
"ef_search",
|
"ef_search",
|
||||||
"ef_construction",
|
"ef_construction",
|
||||||
"m",
|
"m",
|
||||||
|
"max_chunk_bytes",
|
||||||
]
|
]
|
||||||
embeddings = embedding.embed_documents(texts)
|
embeddings = embedding.embed_documents(texts)
|
||||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||||
@ -663,6 +667,7 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
is_appx_search = _get_kwargs_value(kwargs, "is_appx_search", True)
|
is_appx_search = _get_kwargs_value(kwargs, "is_appx_search", True)
|
||||||
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
|
vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field")
|
||||||
text_field = _get_kwargs_value(kwargs, "text_field", "text")
|
text_field = _get_kwargs_value(kwargs, "text_field", "text")
|
||||||
|
max_chunk_bytes = _get_kwargs_value(kwargs, "max_chunk_bytes", 1 * 1024 * 1024)
|
||||||
if is_appx_search:
|
if is_appx_search:
|
||||||
engine = _get_kwargs_value(kwargs, "engine", "nmslib")
|
engine = _get_kwargs_value(kwargs, "engine", "nmslib")
|
||||||
space_type = _get_kwargs_value(kwargs, "space_type", "l2")
|
space_type = _get_kwargs_value(kwargs, "space_type", "l2")
|
||||||
@ -687,5 +692,6 @@ class OpenSearchVectorSearch(VectorStore):
|
|||||||
vector_field=vector_field,
|
vector_field=vector_field,
|
||||||
text_field=text_field,
|
text_field=text_field,
|
||||||
mapping=mapping,
|
mapping=mapping,
|
||||||
|
max_chunk_bytes=max_chunk_bytes,
|
||||||
)
|
)
|
||||||
return cls(opensearch_url, index_name, embedding, **kwargs)
|
return cls(opensearch_url, index_name, embedding, **kwargs)
|
||||||
|
Loading…
Reference in New Issue
Block a user