mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-25 08:57:48 +00:00
Add from_embeddings for opensearch (#10957)
This commit is contained in:
parent
73693c18fc
commit
db05ea2b78
@ -736,7 +736,7 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> OpenSearchVectorSearch:
|
||||
"""Construct OpenSearchVectorSearch wrapper from raw documents.
|
||||
"""Construct OpenSearchVectorSearch wrapper from raw texts.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
@ -754,6 +754,74 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
and lucene engines recommended for large datasets. Also supports brute force
|
||||
search through Script Scoring and Painless Scripting.
|
||||
|
||||
Optional Args:
|
||||
vector_field: Document field embeddings are stored in. Defaults to
|
||||
"vector_field".
|
||||
|
||||
text_field: Document field the text of the document is stored in. Defaults
|
||||
to "text".
|
||||
|
||||
Optional Keyword Args for Approximate Search:
|
||||
engine: "nmslib", "faiss", "lucene"; default: "nmslib"
|
||||
|
||||
space_type: "l2", "l1", "cosinesimil", "linf", "innerproduct"; default: "l2"
|
||||
|
||||
ef_search: Size of the dynamic list used during k-NN searches. Higher values
|
||||
lead to more accurate but slower searches; default: 512
|
||||
|
||||
ef_construction: Size of the dynamic list used during k-NN graph creation.
|
||||
Higher values lead to more accurate graph but slower indexing speed;
|
||||
default: 512
|
||||
|
||||
m: Number of bidirectional links created for each new element. Large impact
|
||||
on memory consumption. Between 2 and 100; default: 16
|
||||
|
||||
Keyword Args for Script Scoring or Painless Scripting:
|
||||
is_appx_search: False
|
||||
|
||||
"""
|
||||
embeddings = embedding.embed_documents(texts)
|
||||
return cls.from_embeddings(
|
||||
embeddings,
|
||||
texts,
|
||||
embedding,
|
||||
metadatas=metadatas,
|
||||
bulk_size=bulk_size,
|
||||
ids=ids,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_embeddings(
|
||||
cls,
|
||||
embeddings: List[List[float]],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
bulk_size: int = 500,
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> OpenSearchVectorSearch:
|
||||
"""Construct OpenSearchVectorSearch wrapper from pre-vectorized embeddings.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.vectorstores import OpenSearchVectorSearch
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
embedder = OpenAIEmbeddings()
|
||||
embeddings = embedder.embed_documents(["foo", "bar"])
|
||||
opensearch_vector_search = OpenSearchVectorSearch.from_embeddings(
|
||||
embeddings,
|
||||
texts,
|
||||
embedder,
|
||||
opensearch_url="http://localhost:9200"
|
||||
)
|
||||
|
||||
OpenSearch by default supports Approximate Search powered by nmslib, faiss
|
||||
and lucene engines recommended for large datasets. Also supports brute force
|
||||
search through Script Scoring and Painless Scripting.
|
||||
|
||||
Optional Args:
|
||||
vector_field: Document field embeddings are stored in. Defaults to
|
||||
"vector_field".
|
||||
@ -799,7 +867,6 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
"max_chunk_bytes",
|
||||
"is_aoss",
|
||||
]
|
||||
embeddings = embedding.embed_documents(texts)
|
||||
_validate_embeddings_and_bulk_size(len(embeddings), bulk_size)
|
||||
dim = len(embeddings[0])
|
||||
# Get the index name from either from kwargs or ENV Variable
|
||||
@ -843,8 +910,8 @@ class OpenSearchVectorSearch(VectorStore):
|
||||
index_name,
|
||||
embeddings,
|
||||
texts,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
metadatas=metadatas,
|
||||
vector_field=vector_field,
|
||||
text_field=text_field,
|
||||
mapping=mapping,
|
||||
|
Loading…
Reference in New Issue
Block a user