community[minor]: Improve InMemoryVectorStore with ability to persist to disk and filter on metadata. (#22186)

- **Description:** The InMemoryVectorStore is a nice and simple vector store implementation for quick development and debugging. The current implementation is quite limited in its functionalities. This PR extends the functionalities by adding utility function to persist the vector store to a json file and to load it from a json file. We choose the json file format because it allows inspection of the database contents in a text editor, which is great for debugging. Furthermore, it adds a `filter` keyword that can be used to filter out documents on their `page_content` or `metadata`. - **Issue:** - - **Dependencies:** - - **Twitter handle:** @Vincent_Min
2025-09-18 16:16:33 +00:00 · 2024-06-05 16:40:34 +02:00
parent c34ad8c163
commit 59bef31997
2 changed files with 91 additions and 30 deletions
--- a/libs/community/tests/unit_tests/vectorstores/test_inmemory.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_inmemory.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 from langchain_core.documents import Document

 from langchain_community.vectorstores.inmemory import InMemoryVectorStore
@@ -44,3 +46,31 @@ async def test_inmemory_mmr() -> None:
    assert len(output) == len(texts)
    assert output[0] == Document(page_content="foo")
    assert output[1] == Document(page_content="foy")
+
+
+async def test_inmemory_dump_load(tmp_path: Path) -> None:
+    """Test end to end construction and search."""
+    embedding = ConsistentFakeEmbeddings()
+    store = await InMemoryVectorStore.afrom_texts(["foo", "bar", "baz"], embedding)
+    output = await store.asimilarity_search("foo", k=1)
+
+    test_file = str(tmp_path / "test.json")
+    store.dump(test_file)
+
+    loaded_store = InMemoryVectorStore.load(test_file, embedding)
+    loaded_output = await loaded_store.asimilarity_search("foo", k=1)
+
+    assert output == loaded_output
+
+
+async def test_inmemory_filter() -> None:
+    """Test end to end construction and search."""
+    store = await InMemoryVectorStore.afrom_texts(
+        ["foo", "bar"],
+        ConsistentFakeEmbeddings(),
+        [{"id": 1}, {"id": 2}],
+    )
+    output = await store.asimilarity_search(
+        "baz", filter=lambda doc: doc.metadata["id"] == 1
+    )
+    assert output == [Document(page_content="foo", metadata={"id": 1})]