add mongodb_store (#13801)

# Add MongoDB storage
  - **Description:** 
  Add MongoDB Storage as an option for large doc store. 

Example usage: 
```Python
# Instantiate the MongodbStore with a MongoDB connection
from langchain.storage import MongodbStore

mongo_conn_str = "mongodb://localhost:27017/"
mongodb_store = MongodbStore(mongo_conn_str, db_name="test-db",
                                collection_name="test-collection")

# Set values for keys
doc1 = Document(page_content='test1')
doc2 = Document(page_content='test2')
mongodb_store.mset([("key1", doc1), ("key2", doc2)])

# Get values for keys
values = mongodb_store.mget(["key1", "key2"])
# [doc1, doc2]

# Iterate over keys
for key in mongodb_store.yield_keys():
    print(key)

# Delete keys
mongodb_store.mdelete(["key1", "key2"])
 ```

  - **Dependencies:**
  Use `mongomock` for integration test.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
Qihui Xie
2024-02-14 11:33:22 +08:00
committed by GitHub
parent 50b48a8e6a
commit 5738143d4b
5 changed files with 213 additions and 0 deletions

View File

@@ -0,0 +1,73 @@
from typing import Generator
import pytest
from langchain_core.documents import Document
from langchain_community.storage.mongodb import MongoDBStore
pytest.importorskip("pymongo")
@pytest.fixture
def mongo_store() -> Generator:
import mongomock
# mongomock creates a mock MongoDB instance for testing purposes
with mongomock.patch(servers=(("localhost", 27017),)):
yield MongoDBStore("mongodb://localhost:27017/", "test_db", "test_collection")
def test_mset_and_mget(mongo_store: MongoDBStore) -> None:
doc1 = Document(page_content="doc1")
doc2 = Document(page_content="doc2")
# Set documents in the store
mongo_store.mset([("key1", doc1), ("key2", doc2)])
# Get documents from the store
retrieved_docs = mongo_store.mget(["key1", "key2"])
assert retrieved_docs[0] and retrieved_docs[0].page_content == "doc1"
assert retrieved_docs[1] and retrieved_docs[1].page_content == "doc2"
def test_yield_keys(mongo_store: MongoDBStore) -> None:
mongo_store.mset(
[
("key1", Document(page_content="doc1")),
("key2", Document(page_content="doc2")),
("another_key", Document(page_content="other")),
]
)
# Test without prefix
keys = list(mongo_store.yield_keys())
assert set(keys) == {"key1", "key2", "another_key"}
# Test with prefix
keys_with_prefix = list(mongo_store.yield_keys(prefix="key"))
assert set(keys_with_prefix) == {"key1", "key2"}
def test_mdelete(mongo_store: MongoDBStore) -> None:
mongo_store.mset(
[
("key1", Document(page_content="doc1")),
("key2", Document(page_content="doc2")),
]
)
# Delete single document
mongo_store.mdelete(["key1"])
remaining_docs = list(mongo_store.yield_keys())
assert "key1" not in remaining_docs
assert "key2" in remaining_docs
# Delete multiple documents
mongo_store.mdelete(["key2"])
remaining_docs = list(mongo_store.yield_keys())
assert len(remaining_docs) == 0
def test_init_errors() -> None:
with pytest.raises(ValueError):
MongoDBStore("", "", "")

View File

@@ -3,6 +3,7 @@ from langchain_community.storage import __all__
EXPECTED_ALL = [
"AstraDBStore",
"AstraDBByteStore",
"MongoDBStore",
"RedisStore",
"UpstashRedisByteStore",
"UpstashRedisStore",

View File

@@ -0,0 +1,11 @@
"""Light weight unit test that attempts to import MongodbStore.
The actual code is tested in integration tests.
This test is intended to catch errors in the import process.
"""
def test_import_storage() -> None:
"""Attempt to import storage modules."""
from langchain_community.storage.mongodb import MongoDBStore # noqa