adding MongoDBAtlasVectorSearch (#5338)

# Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
2025-09-13 13:36:15 +00:00 · 2023-05-30 16:59:01 +02:00
parent c4b502a470
commit a61b7f7e7c
7 changed files with 588 additions and 5 deletions
--- a/tests/integration_tests/.env.example
+++ b/tests/integration_tests/.env.example
@@ -22,4 +22,8 @@ PINECONE_ENVIRONMENT=us-west4-gcp
 # details here https://learn.microsoft.com/en-us/dotnet/api/azure.identity.defaultazurecredential?view=azure-dotnet
 POWERBI_DATASET_ID=_powerbi_dataset_id_here
 POWERBI_TABLE_NAME=_test_table_name_here
-POWERBI_NUMROWS=_num_rows_in_your_test_table
+POWERBI_NUMROWS=_num_rows_in_your_test_table
+
+
+# MongoDB Atlas Vector Search
+MONGODB_ATLAS_URI=your_mongodb_atlas_connection_string
--- a/tests/integration_tests/vectorstores/test_mongodb_atlas.py
+++ b/tests/integration_tests/vectorstores/test_mongodb_atlas.py
@@ -0,0 +1,135 @@
+"""Test MongoDB Atlas Vector Search functionality."""
+from __future__ import annotations
+
+import os
+from time import sleep
+from typing import TYPE_CHECKING, Optional
+
+import pytest
+
+from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
+from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
+
+if TYPE_CHECKING:
+    from pymongo import MongoClient
+
+INDEX_NAME = "langchain-test-index"
+NAMESPACE = "langchain_test_db.langchain_test_collection"
+CONNECTION_STRING = os.environ.get("MONGODB_ATLAS_URI")
+DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
+
+
+def get_test_client() -> Optional[MongoClient]:
+    try:
+        from pymongo import MongoClient
+
+        client: MongoClient = MongoClient(CONNECTION_STRING)
+        return client
+    except:  # noqa: E722
+        return None
+
+
+# Instantiate as constant instead of pytest fixture to prevent needing to make multiple
+# connections.
+TEST_CLIENT = get_test_client()
+
+
+class TestMongoDBAtlasVectorSearch:
+    @classmethod
+    def setup_class(cls) -> None:
+        # insure the test collection is empty
+        assert TEST_CLIENT[DB_NAME][COLLECTION_NAME].count_documents({}) == 0  # type: ignore[index]  # noqa: E501
+
+    @classmethod
+    def teardown_class(cls) -> None:
+        # delete all the documents in the collection
+        TEST_CLIENT[DB_NAME][COLLECTION_NAME].delete_many({})  # type: ignore[index]
+
+    @pytest.fixture(autouse=True)
+    def setup(self) -> None:
+        # delete all the documents in the collection
+        TEST_CLIENT[DB_NAME][COLLECTION_NAME].delete_many({})  # type: ignore[index]
+
+    def test_from_documents(self, embedding_openai: Embeddings) -> None:
+        """Test end to end construction and search."""
+        documents = [
+            Document(page_content="Dogs are tough.", metadata={"a": 1}),
+            Document(page_content="Cats have fluff.", metadata={"b": 1}),
+            Document(page_content="What is a sandwich?", metadata={"c": 1}),
+            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
+        ]
+        vectorstore = MongoDBAtlasVectorSearch.from_documents(
+            documents,
+            embedding_openai,
+            client=TEST_CLIENT,
+            namespace=NAMESPACE,
+            index_name=INDEX_NAME,
+        )
+        sleep(1)  # waits for mongot to update Lucene's index
+        output = vectorstore.similarity_search("Sandwich", k=1)
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+
+    def test_from_texts(self, embedding_openai: Embeddings) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "That fence is purple.",
+        ]
+        vectorstore = MongoDBAtlasVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            client=TEST_CLIENT,
+            namespace=NAMESPACE,
+            index_name=INDEX_NAME,
+        )
+        sleep(1)  # waits for mongot to update Lucene's index
+        output = vectorstore.similarity_search("Sandwich", k=1)
+        assert output[0].page_content == "What is a sandwich?"
+
+    def test_from_texts_with_metadatas(self, embedding_openai: Embeddings) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        vectorstore = MongoDBAtlasVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            client=TEST_CLIENT,
+            namespace=NAMESPACE,
+            index_name=INDEX_NAME,
+        )
+        sleep(1)  # waits for mongot to update Lucene's index
+        output = vectorstore.similarity_search("Sandwich", k=1)
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+
+    def test_from_texts_with_metadatas_and_pre_filter(
+        self, embedding_openai: Embeddings
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        vectorstore = MongoDBAtlasVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            client=TEST_CLIENT,
+            namespace=NAMESPACE,
+            index_name=INDEX_NAME,
+        )
+        sleep(1)  # waits for mongot to update Lucene's index
+        output = vectorstore.similarity_search(
+            "Sandwich", k=1, pre_filter={"range": {"lte": 0, "path": "c"}}
+        )
+        assert output == []