diff --git a/libs/community/langchain_community/vectorstores/opensearch_vector_search.py b/libs/community/langchain_community/vectorstores/opensearch_vector_search.py index da5d38db6dc..f1329659eec 100644 --- a/libs/community/langchain_community/vectorstores/opensearch_vector_search.py +++ b/libs/community/langchain_community/vectorstores/opensearch_vector_search.py @@ -16,7 +16,7 @@ IMPORT_OPENSEARCH_PY_ERROR = ( "Could not import OpenSearch. Please install it with `pip install opensearch-py`." ) IMPORT_ASYNC_OPENSEARCH_PY_ERROR = """ -Could not import AsyncOpenSearch. +Could not import AsyncOpenSearch. Please install it with `pip install opensearch-py`.""" SCRIPT_SCORING_SEARCH = "script_scoring" @@ -902,6 +902,7 @@ class OpenSearchVectorSearch(VectorStore): if metadata_field == "*" or metadata_field not in hit["_source"] else hit["_source"][metadata_field] ), + id=hit["_id"], ), hit["_score"], ) @@ -1099,6 +1100,7 @@ class OpenSearchVectorSearch(VectorStore): Document( page_content=results[i]["_source"][text_field], metadata=results[i]["_source"][metadata_field], + id=results[i]["_id"], ) for i in mmr_selected ] diff --git a/libs/community/tests/integration_tests/vectorstores/test_opensearch.py b/libs/community/tests/integration_tests/vectorstores/test_opensearch.py index 37dae734605..22bff520073 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_opensearch.py +++ b/libs/community/tests/integration_tests/vectorstores/test_opensearch.py @@ -15,15 +15,19 @@ from tests.integration_tests.vectorstores.fake_embeddings import ( DEFAULT_OPENSEARCH_URL = "http://localhost:9200" texts = ["foo", "bar", "baz"] +ids = ["id_foo", "id_bar", "id_baz"] def test_opensearch() -> None: """Test end to end indexing and search using Approximate Search.""" docsearch = OpenSearchVectorSearch.from_texts( - texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL + texts, + FakeEmbeddings(), + opensearch_url=DEFAULT_OPENSEARCH_URL, + ids=ids, ) output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo")] + assert output == [Document(page_content="foo", id="id_foo")] def test_similarity_search_with_score() -> None: @@ -34,11 +38,12 @@ def test_similarity_search_with_score() -> None: FakeEmbeddings(), metadatas=metadatas, opensearch_url=DEFAULT_OPENSEARCH_URL, + ids=ids, ) output = docsearch.similarity_search_with_score("foo", k=2) assert output == [ - (Document(page_content="foo", metadata={"page": 0}), 1.0), - (Document(page_content="bar", metadata={"page": 1}), 0.5), + (Document(page_content="foo", metadata={"page": 0}, id="id_foo"), 1.0), + (Document(page_content="bar", metadata={"page": 1}, id="id_bar"), 0.5), ] @@ -50,20 +55,24 @@ def test_opensearch_with_custom_field_name() -> None: opensearch_url=DEFAULT_OPENSEARCH_URL, vector_field="my_vector", text_field="custom_text", + ids=ids, ) output = docsearch.similarity_search( "foo", k=1, vector_field="my_vector", text_field="custom_text" ) - assert output == [Document(page_content="foo")] + assert output == [Document(page_content="foo", id="id_foo")] text_input = ["test", "add", "text", "method"] OpenSearchVectorSearch.add_texts( - docsearch, text_input, vector_field="my_vector", text_field="custom_text" + docsearch, + text_input, + vector_field="my_vector", + text_field="custom_text", ) output = docsearch.similarity_search( "add", k=1, vector_field="my_vector", text_field="custom_text" ) - assert output == [Document(page_content="foo")] + assert output == [Document(page_content="foo", id="id_foo")] def test_opensearch_with_metadatas() -> None: @@ -74,9 +83,22 @@ def test_opensearch_with_metadatas() -> None: FakeEmbeddings(), metadatas=metadatas, opensearch_url=DEFAULT_OPENSEARCH_URL, + ids=ids, ) output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo", metadata={"page": 0})] + assert output == [Document(page_content="foo", metadata={"page": 0}, id="id_foo")] + + +def test_max_marginal_relevance_search() -> None: + """Test end to end indexing and mmr search.""" + docsearch = OpenSearchVectorSearch.from_texts( + texts, + FakeEmbeddings(), + opensearch_url=DEFAULT_OPENSEARCH_URL, + ids=ids, + ) + output = docsearch.max_marginal_relevance_search("foo", k=1) + assert output == [Document(page_content="foo", id="id_foo")] def test_add_text() -> None: @@ -86,8 +108,8 @@ def test_add_text() -> None: docsearch = OpenSearchVectorSearch.from_texts( texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL ) - docids = OpenSearchVectorSearch.add_texts(docsearch, text_input, metadatas) - assert len(docids) == len(text_input) + doc_ids = OpenSearchVectorSearch.add_texts(docsearch, text_input, metadatas) + assert len(doc_ids) == len(text_input) def test_add_embeddings() -> None: @@ -112,7 +134,8 @@ def test_add_embeddings() -> None: ) docsearch.add_embeddings(list(zip(text_input, embedding_vectors)), metadatas) output = docsearch.similarity_search("foo1", k=1) - assert output == [Document(page_content="foo3", metadata={"page": 2})] + assert output[0].page_content == "foo3" + assert output[0].metadata == {"page": 2} def test_opensearch_script_scoring() -> None: @@ -127,7 +150,8 @@ def test_opensearch_script_scoring() -> None: output = docsearch.similarity_search( "foo", k=1, search_type=SCRIPT_SCORING_SEARCH, pre_filter=pre_filter_val ) - assert output == [Document(page_content="bar")] + assert output[0].page_content == "bar" + assert output[0].id is not None def test_add_text_script_scoring() -> None: @@ -144,7 +168,8 @@ def test_add_text_script_scoring() -> None: output = docsearch.similarity_search( "add", k=1, search_type=SCRIPT_SCORING_SEARCH, space_type="innerproduct" ) - assert output == [Document(page_content="test")] + assert output[0].page_content == "test" + assert output[0].id is not None def test_opensearch_painless_scripting() -> None: @@ -159,7 +184,8 @@ def test_opensearch_painless_scripting() -> None: output = docsearch.similarity_search( "foo", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, pre_filter=pre_filter_val ) - assert output == [Document(page_content="baz")] + assert output[0].page_content == "baz" + assert output[0].id is not None def test_add_text_painless_scripting() -> None: @@ -176,7 +202,8 @@ def test_add_text_painless_scripting() -> None: output = docsearch.similarity_search( "add", k=1, search_type=PAINLESS_SCRIPTING_SEARCH, space_type="cosineSimilarity" ) - assert output == [Document(page_content="test")] + assert output[0].page_content == "test" + assert output[0].id is not None def test_opensearch_invalid_search_type() -> None: @@ -207,7 +234,8 @@ def test_appx_search_with_boolean_filter() -> None: output = docsearch.similarity_search( "foo", k=3, boolean_filter=boolean_filter_val, subquery_clause="should" ) - assert output == [Document(page_content="bar")] + assert output[0].page_content == "bar" + assert output[0].id is not None def test_appx_search_with_lucene_filter() -> None: @@ -217,7 +245,8 @@ def test_appx_search_with_lucene_filter() -> None: texts, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL, engine="lucene" ) output = docsearch.similarity_search("foo", k=3, lucene_filter=lucene_filter_val) - assert output == [Document(page_content="bar")] + assert output[0].page_content == "bar" + assert output[0].id is not None def test_opensearch_with_custom_field_name_appx_true() -> None: @@ -230,7 +259,8 @@ def test_opensearch_with_custom_field_name_appx_true() -> None: is_appx_search=True, ) output = docsearch.similarity_search("add", k=1) - assert output == [Document(page_content="add")] + assert output[0].page_content == "add" + assert output[0].id is not None def test_opensearch_with_custom_field_name_appx_false() -> None: @@ -240,7 +270,8 @@ def test_opensearch_with_custom_field_name_appx_false() -> None: text_input, FakeEmbeddings(), opensearch_url=DEFAULT_OPENSEARCH_URL ) output = docsearch.similarity_search("add", k=1) - assert output == [Document(page_content="add")] + assert output[0].page_content == "add" + assert output[0].id is not None def test_opensearch_serverless_with_scripting_search_indexing_throws_error() -> None: @@ -338,4 +369,5 @@ def test_appx_search_with_faiss_efficient_filter() -> None: output = docsearch.similarity_search( "foo", k=3, efficient_filter=efficient_filter_val ) - assert output == [Document(page_content="bar")] + assert output[0].page_content == "bar" + assert output[0].id is not None