mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-18 16:16:33 +00:00
community[minor]: Improve InMemoryVectorStore with ability to persist to disk and filter on metadata. (#22186)
- **Description:** The InMemoryVectorStore is a nice and simple vector store implementation for quick development and debugging. The current implementation is quite limited in its functionalities. This PR extends the functionalities by adding utility function to persist the vector store to a json file and to load it from a json file. We choose the json file format because it allows inspection of the database contents in a text editor, which is great for debugging. Furthermore, it adds a `filter` keyword that can be used to filter out documents on their `page_content` or `metadata`. - **Issue:** - - **Dependencies:** - - **Twitter handle:** @Vincent_Min
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
from pathlib import Path
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.vectorstores.inmemory import InMemoryVectorStore
|
||||
@@ -44,3 +46,31 @@ async def test_inmemory_mmr() -> None:
|
||||
assert len(output) == len(texts)
|
||||
assert output[0] == Document(page_content="foo")
|
||||
assert output[1] == Document(page_content="foy")
|
||||
|
||||
|
||||
async def test_inmemory_dump_load(tmp_path: Path) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
embedding = ConsistentFakeEmbeddings()
|
||||
store = await InMemoryVectorStore.afrom_texts(["foo", "bar", "baz"], embedding)
|
||||
output = await store.asimilarity_search("foo", k=1)
|
||||
|
||||
test_file = str(tmp_path / "test.json")
|
||||
store.dump(test_file)
|
||||
|
||||
loaded_store = InMemoryVectorStore.load(test_file, embedding)
|
||||
loaded_output = await loaded_store.asimilarity_search("foo", k=1)
|
||||
|
||||
assert output == loaded_output
|
||||
|
||||
|
||||
async def test_inmemory_filter() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
store = await InMemoryVectorStore.afrom_texts(
|
||||
["foo", "bar"],
|
||||
ConsistentFakeEmbeddings(),
|
||||
[{"id": 1}, {"id": 2}],
|
||||
)
|
||||
output = await store.asimilarity_search(
|
||||
"baz", filter=lambda doc: doc.metadata["id"] == 1
|
||||
)
|
||||
assert output == [Document(page_content="foo", metadata={"id": 1})]
|
||||
|
Reference in New Issue
Block a user