mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 13:36:15 +00:00
adding MongoDBAtlasVectorSearch (#5338)
# Add MongoDBAtlasVectorSearch for the python library Fixes #5337 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
committed by
GitHub
parent
c4b502a470
commit
a61b7f7e7c
@@ -22,4 +22,8 @@ PINECONE_ENVIRONMENT=us-west4-gcp
|
||||
# details here https://learn.microsoft.com/en-us/dotnet/api/azure.identity.defaultazurecredential?view=azure-dotnet
|
||||
POWERBI_DATASET_ID=_powerbi_dataset_id_here
|
||||
POWERBI_TABLE_NAME=_test_table_name_here
|
||||
POWERBI_NUMROWS=_num_rows_in_your_test_table
|
||||
POWERBI_NUMROWS=_num_rows_in_your_test_table
|
||||
|
||||
|
||||
# MongoDB Atlas Vector Search
|
||||
MONGODB_ATLAS_URI=your_mongodb_atlas_connection_string
|
135
tests/integration_tests/vectorstores/test_mongodb_atlas.py
Normal file
135
tests/integration_tests/vectorstores/test_mongodb_atlas.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""Test MongoDB Atlas Vector Search functionality."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pymongo import MongoClient
|
||||
|
||||
INDEX_NAME = "langchain-test-index"
|
||||
NAMESPACE = "langchain_test_db.langchain_test_collection"
|
||||
CONNECTION_STRING = os.environ.get("MONGODB_ATLAS_URI")
|
||||
DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
|
||||
|
||||
|
||||
def get_test_client() -> Optional[MongoClient]:
|
||||
try:
|
||||
from pymongo import MongoClient
|
||||
|
||||
client: MongoClient = MongoClient(CONNECTION_STRING)
|
||||
return client
|
||||
except: # noqa: E722
|
||||
return None
|
||||
|
||||
|
||||
# Instantiate as constant instead of pytest fixture to prevent needing to make multiple
|
||||
# connections.
|
||||
TEST_CLIENT = get_test_client()
|
||||
|
||||
|
||||
class TestMongoDBAtlasVectorSearch:
|
||||
@classmethod
|
||||
def setup_class(cls) -> None:
|
||||
# insure the test collection is empty
|
||||
assert TEST_CLIENT[DB_NAME][COLLECTION_NAME].count_documents({}) == 0 # type: ignore[index] # noqa: E501
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls) -> None:
|
||||
# delete all the documents in the collection
|
||||
TEST_CLIENT[DB_NAME][COLLECTION_NAME].delete_many({}) # type: ignore[index]
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self) -> None:
|
||||
# delete all the documents in the collection
|
||||
TEST_CLIENT[DB_NAME][COLLECTION_NAME].delete_many({}) # type: ignore[index]
|
||||
|
||||
def test_from_documents(self, embedding_openai: Embeddings) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
documents = [
|
||||
Document(page_content="Dogs are tough.", metadata={"a": 1}),
|
||||
Document(page_content="Cats have fluff.", metadata={"b": 1}),
|
||||
Document(page_content="What is a sandwich?", metadata={"c": 1}),
|
||||
Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
|
||||
]
|
||||
vectorstore = MongoDBAtlasVectorSearch.from_documents(
|
||||
documents,
|
||||
embedding_openai,
|
||||
client=TEST_CLIENT,
|
||||
namespace=NAMESPACE,
|
||||
index_name=INDEX_NAME,
|
||||
)
|
||||
sleep(1) # waits for mongot to update Lucene's index
|
||||
output = vectorstore.similarity_search("Sandwich", k=1)
|
||||
assert output[0].page_content == "What is a sandwich?"
|
||||
assert output[0].metadata["c"] == 1
|
||||
|
||||
def test_from_texts(self, embedding_openai: Embeddings) -> None:
|
||||
texts = [
|
||||
"Dogs are tough.",
|
||||
"Cats have fluff.",
|
||||
"What is a sandwich?",
|
||||
"That fence is purple.",
|
||||
]
|
||||
vectorstore = MongoDBAtlasVectorSearch.from_texts(
|
||||
texts,
|
||||
embedding_openai,
|
||||
client=TEST_CLIENT,
|
||||
namespace=NAMESPACE,
|
||||
index_name=INDEX_NAME,
|
||||
)
|
||||
sleep(1) # waits for mongot to update Lucene's index
|
||||
output = vectorstore.similarity_search("Sandwich", k=1)
|
||||
assert output[0].page_content == "What is a sandwich?"
|
||||
|
||||
def test_from_texts_with_metadatas(self, embedding_openai: Embeddings) -> None:
|
||||
texts = [
|
||||
"Dogs are tough.",
|
||||
"Cats have fluff.",
|
||||
"What is a sandwich?",
|
||||
"The fence is purple.",
|
||||
]
|
||||
metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
|
||||
vectorstore = MongoDBAtlasVectorSearch.from_texts(
|
||||
texts,
|
||||
embedding_openai,
|
||||
metadatas=metadatas,
|
||||
client=TEST_CLIENT,
|
||||
namespace=NAMESPACE,
|
||||
index_name=INDEX_NAME,
|
||||
)
|
||||
sleep(1) # waits for mongot to update Lucene's index
|
||||
output = vectorstore.similarity_search("Sandwich", k=1)
|
||||
assert output[0].page_content == "What is a sandwich?"
|
||||
assert output[0].metadata["c"] == 1
|
||||
|
||||
def test_from_texts_with_metadatas_and_pre_filter(
|
||||
self, embedding_openai: Embeddings
|
||||
) -> None:
|
||||
texts = [
|
||||
"Dogs are tough.",
|
||||
"Cats have fluff.",
|
||||
"What is a sandwich?",
|
||||
"The fence is purple.",
|
||||
]
|
||||
metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
|
||||
vectorstore = MongoDBAtlasVectorSearch.from_texts(
|
||||
texts,
|
||||
embedding_openai,
|
||||
metadatas=metadatas,
|
||||
client=TEST_CLIENT,
|
||||
namespace=NAMESPACE,
|
||||
index_name=INDEX_NAME,
|
||||
)
|
||||
sleep(1) # waits for mongot to update Lucene's index
|
||||
output = vectorstore.similarity_search(
|
||||
"Sandwich", k=1, pre_filter={"range": {"lte": 0, "path": "c"}}
|
||||
)
|
||||
assert output == []
|
Reference in New Issue
Block a user