core[minor],community[patch],standard-tests[patch]: Move InMemoryImplementation to langchain-core (#23986)

This PR moves the in memory implementation to langchain-core.

* The implementation remains importable from langchain-community.
* Supporting utilities are marked as private for now.
This commit is contained in:
Eugene Yurtsev 2024-07-08 17:11:51 -04:00 committed by GitHub
parent aa8c9bb4a9
commit f765e8fa9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 515 additions and 259 deletions

View File

@ -1,249 +1,5 @@
import json
import uuid
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
from langchain_core.vectorstores import InMemoryVectorStore
import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.indexing import UpsertResponse
from langchain_core.load import dumpd, load
from langchain_core.vectorstores import VectorStore
from langchain_community.utils.math import cosine_similarity
from langchain_community.vectorstores.utils import maximal_marginal_relevance
class InMemoryVectorStore(VectorStore):
"""In-memory implementation of VectorStore using a dictionary.
Uses numpy to compute cosine similarity for search.
Args:
embedding: embedding function to use.
"""
def __init__(self, embedding: Embeddings) -> None:
self.store: Dict[str, Dict[str, Any]] = {}
self.embedding = embedding
@property
def embeddings(self) -> Embeddings:
return self.embedding
def delete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
if ids:
for _id in ids:
self.store.pop(_id, None)
async def adelete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
self.delete(ids)
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
vectors = self.embedding.embed_documents([item.page_content for item in items])
ids = []
for item, vector in zip(items, vectors):
doc_id = item.id if item.id else str(uuid.uuid4())
ids.append(doc_id)
self.store[doc_id] = {
"id": doc_id,
"vector": vector,
"text": item.page_content,
"metadata": item.metadata,
}
return {
"succeeded": ids,
"failed": [],
}
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
"""Get documents by their ids."""
documents = []
for doc_id in ids:
doc = self.store.get(doc_id)
if doc:
documents.append(
Document(
id=doc["id"],
page_content=doc["text"],
metadata=doc["metadata"],
)
)
return documents
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
return self.get_by_ids(ids)
async def aadd_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> List[str]:
return self.add_texts(texts, metadatas, **kwargs)
def _similarity_search_with_score_by_vector(
self,
embedding: List[float],
k: int = 4,
filter: Optional[Callable[[Document], bool]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float, List[float]]]:
result = []
for doc in self.store.values():
vector = doc["vector"]
similarity = float(cosine_similarity([embedding], [vector]).item(0))
result.append(
(
Document(
id=doc["id"], page_content=doc["text"], metadata=doc["metadata"]
),
similarity,
vector,
)
)
result.sort(key=lambda x: x[1], reverse=True)
if filter is not None:
result = [r for r in result if filter(r[0])]
return result[:k]
def similarity_search_with_score_by_vector(
self,
embedding: List[float],
k: int = 4,
filter: Optional[Callable[[Document], bool]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
return [
(doc, similarity)
for doc, similarity, _ in self._similarity_search_with_score_by_vector(
embedding=embedding, k=k, filter=filter, **kwargs
)
]
def similarity_search_with_score(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
embedding = self.embedding.embed_query(query)
docs = self.similarity_search_with_score_by_vector(
embedding,
k,
**kwargs,
)
return docs
async def asimilarity_search_with_score(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Tuple[Document, float]]:
return self.similarity_search_with_score(query, k, **kwargs)
def similarity_search_by_vector(
self,
embedding: List[float],
k: int = 4,
**kwargs: Any,
) -> List[Document]:
docs_and_scores = self.similarity_search_with_score_by_vector(
embedding,
k,
**kwargs,
)
return [doc for doc, _ in docs_and_scores]
async def asimilarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
return self.similarity_search_by_vector(embedding, k, **kwargs)
def similarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
return [doc for doc, _ in self.similarity_search_with_score(query, k, **kwargs)]
async def asimilarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
return self.similarity_search(query, k, **kwargs)
def max_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
prefetch_hits = self._similarity_search_with_score_by_vector(
embedding=embedding,
k=fetch_k,
**kwargs,
)
mmr_chosen_indices = maximal_marginal_relevance(
np.array(embedding, dtype=np.float32),
[vector for _, _, vector in prefetch_hits],
k=k,
lambda_mult=lambda_mult,
)
return [prefetch_hits[idx][0] for idx in mmr_chosen_indices]
def max_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
embedding_vector = self.embedding.embed_query(query)
return self.max_marginal_relevance_search_by_vector(
embedding_vector,
k,
fetch_k,
lambda_mult=lambda_mult,
**kwargs,
)
@classmethod
def from_texts(
cls,
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> "InMemoryVectorStore":
store = cls(
embedding=embedding,
)
store.add_texts(texts=texts, metadatas=metadatas, **kwargs)
return store
@classmethod
async def afrom_texts(
cls,
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> "InMemoryVectorStore":
return cls.from_texts(texts, embedding, metadatas, **kwargs)
@classmethod
def load(
cls, path: str, embedding: Embeddings, **kwargs: Any
) -> "InMemoryVectorStore":
_path: Path = Path(path)
with _path.open("r") as f:
store = load(json.load(f))
vectorstore = cls(embedding=embedding, **kwargs)
vectorstore.store = store
return vectorstore
def dump(self, path: str) -> None:
_path: Path = Path(path)
_path.parent.mkdir(exist_ok=True, parents=True)
with _path.open("w") as f:
json.dump(dumpd(self.store), f, indent=2)
__all__ = [
"InMemoryVectorStore",
]

View File

@ -0,0 +1,9 @@
from langchain_core.vectorstores.base import VST, VectorStore, VectorStoreRetriever
from langchain_core.vectorstores.in_memory import InMemoryVectorStore
__all__ = [
"VectorStore",
"VST",
"VectorStoreRetriever",
"InMemoryVectorStore",
]

View File

@ -0,0 +1,275 @@
from __future__ import annotations
import json
import uuid
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
Iterable,
List,
Optional,
Sequence,
Tuple,
)
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.load import dumpd, load
from langchain_core.vectorstores import VectorStore
from langchain_core.vectorstores.utils import _cosine_similarity as cosine_similarity
from langchain_core.vectorstores.utils import (
_maximal_marginal_relevance as maximal_marginal_relevance,
)
if TYPE_CHECKING:
from langchain_core.indexing import UpsertResponse
class InMemoryVectorStore(VectorStore):
"""In-memory implementation of VectorStore using a dictionary.
Uses numpy to compute cosine similarity for search.
Args:
embedding: embedding function to use.
"""
def __init__(self, embedding: Embeddings) -> None:
"""Initialize with the given embedding function."""
# TODO: would be nice to change to
# Dict[str, Document] at some point (will be a breaking change)
self.store: Dict[str, Dict[str, Any]] = {}
self.embedding = embedding
@property
def embeddings(self) -> Embeddings:
return self.embedding
def delete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
if ids:
for _id in ids:
self.store.pop(_id, None)
async def adelete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
self.delete(ids)
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
vectors = self.embedding.embed_documents([item.page_content for item in items])
ids = []
for item, vector in zip(items, vectors):
doc_id = item.id if item.id else str(uuid.uuid4())
ids.append(doc_id)
self.store[doc_id] = {
"id": doc_id,
"vector": vector,
"text": item.page_content,
"metadata": item.metadata,
}
return {
"succeeded": ids,
"failed": [],
}
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
"""Get documents by their ids."""
documents = []
for doc_id in ids:
doc = self.store.get(doc_id)
if doc:
documents.append(
Document(
id=doc["id"],
page_content=doc["text"],
metadata=doc["metadata"],
)
)
return documents
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
return self.get_by_ids(ids)
async def aadd_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> List[str]:
return self.add_texts(texts, metadatas, **kwargs)
def _similarity_search_with_score_by_vector(
self,
embedding: List[float],
k: int = 4,
filter: Optional[Callable[[Document], bool]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float, List[float]]]:
result = []
for doc in self.store.values():
vector = doc["vector"]
similarity = float(cosine_similarity([embedding], [vector]).item(0))
result.append(
(
Document(
id=doc["id"], page_content=doc["text"], metadata=doc["metadata"]
),
similarity,
vector,
)
)
result.sort(key=lambda x: x[1], reverse=True)
if filter is not None:
result = [r for r in result if filter(r[0])]
return result[:k]
def similarity_search_with_score_by_vector(
self,
embedding: List[float],
k: int = 4,
filter: Optional[Callable[[Document], bool]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
return [
(doc, similarity)
for doc, similarity, _ in self._similarity_search_with_score_by_vector(
embedding=embedding, k=k, filter=filter, **kwargs
)
]
def similarity_search_with_score(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
embedding = self.embedding.embed_query(query)
docs = self.similarity_search_with_score_by_vector(
embedding,
k,
**kwargs,
)
return docs
async def asimilarity_search_with_score(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Tuple[Document, float]]:
return self.similarity_search_with_score(query, k, **kwargs)
def similarity_search_by_vector(
self,
embedding: List[float],
k: int = 4,
**kwargs: Any,
) -> List[Document]:
docs_and_scores = self.similarity_search_with_score_by_vector(
embedding,
k,
**kwargs,
)
return [doc for doc, _ in docs_and_scores]
async def asimilarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
return self.similarity_search_by_vector(embedding, k, **kwargs)
def similarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
return [doc for doc, _ in self.similarity_search_with_score(query, k, **kwargs)]
async def asimilarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
return self.similarity_search(query, k, **kwargs)
def max_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
prefetch_hits = self._similarity_search_with_score_by_vector(
embedding=embedding,
k=fetch_k,
**kwargs,
)
try:
import numpy as np
except ImportError:
raise ImportError(
"numpy must be installed to use max_marginal_relevance_search "
"pip install numpy"
)
mmr_chosen_indices = maximal_marginal_relevance(
np.array(embedding, dtype=np.float32),
[vector for _, _, vector in prefetch_hits],
k=k,
lambda_mult=lambda_mult,
)
return [prefetch_hits[idx][0] for idx in mmr_chosen_indices]
def max_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
embedding_vector = self.embedding.embed_query(query)
return self.max_marginal_relevance_search_by_vector(
embedding_vector,
k,
fetch_k,
lambda_mult=lambda_mult,
**kwargs,
)
@classmethod
def from_texts(
cls,
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> "InMemoryVectorStore":
store = cls(
embedding=embedding,
)
store.add_texts(texts=texts, metadatas=metadatas, **kwargs)
return store
@classmethod
async def afrom_texts(
cls,
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> "InMemoryVectorStore":
return cls.from_texts(texts, embedding, metadatas, **kwargs)
@classmethod
def load(
cls, path: str, embedding: Embeddings, **kwargs: Any
) -> "InMemoryVectorStore":
_path: Path = Path(path)
with _path.open("r") as f:
store = load(json.load(f))
vectorstore = cls(embedding=embedding, **kwargs)
vectorstore.store = store
return vectorstore
def dump(self, path: str) -> None:
_path: Path = Path(path)
_path.parent.mkdir(exist_ok=True, parents=True)
with _path.open("w") as f:
json.dump(dumpd(self.store), f, indent=2)

View File

@ -0,0 +1,100 @@
"""Internal utilities for the in memory implementation of VectorStore.
These are part of a private API and users should not used them directly
as they can change without notice.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, List, Union
if TYPE_CHECKING:
import numpy as np
Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
logger = logging.getLogger(__name__)
def _cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
"""Row-wise cosine similarity between two equal-width matrices."""
try:
import numpy as np
except ImportError:
raise ImportError(
"cosine_similarity requires numpy to be installed. "
"Please install numpy with `pip install numpy`."
)
if len(X) == 0 or len(Y) == 0:
return np.array([])
X = np.array(X)
Y = np.array(Y)
if X.shape[1] != Y.shape[1]:
raise ValueError(
f"Number of columns in X and Y must be the same. X has shape {X.shape} "
f"and Y has shape {Y.shape}."
)
try:
import simsimd as simd # type: ignore
X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)
Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
return Z
except ImportError:
logger.debug(
"Unable to import simsimd, defaulting to NumPy implementation. If you want "
"to use simsimd please install with `pip install simsimd`."
)
X_norm = np.linalg.norm(X, axis=1)
Y_norm = np.linalg.norm(Y, axis=1)
# Ignore divide by zero errors run time warnings as those are handled below.
with np.errstate(divide="ignore", invalid="ignore"):
similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm)
similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0
return similarity
def _maximal_marginal_relevance(
query_embedding: np.ndarray,
embedding_list: list,
lambda_mult: float = 0.5,
k: int = 4,
) -> List[int]:
"""Calculate maximal marginal relevance."""
try:
import numpy as np
except ImportError:
raise ImportError(
"maximal_marginal_relevance requires numpy to be installed. "
"Please install numpy with `pip install numpy`."
)
if min(k, len(embedding_list)) <= 0:
return []
if query_embedding.ndim == 1:
query_embedding = np.expand_dims(query_embedding, axis=0)
similarity_to_query = _cosine_similarity(query_embedding, embedding_list)[0]
most_similar = int(np.argmax(similarity_to_query))
idxs = [most_similar]
selected = np.array([embedding_list[most_similar]])
while len(idxs) < min(k, len(embedding_list)):
best_score = -np.inf
idx_to_add = -1
similarity_to_selected = _cosine_similarity(embedding_list, selected)
for i, query_score in enumerate(similarity_to_query):
if i in idxs:
continue
redundant_score = max(similarity_to_selected[i])
equation_score = (
lambda_mult * query_score - (1 - lambda_mult) * redundant_score
)
if equation_score > best_score:
best_score = equation_score
idx_to_add = i
idxs.append(idx_to_add)
selected = np.append(selected, [embedding_list[idx_to_add]], axis=0)
return idxs

23
libs/core/poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
[[package]]
name = "annotated-types"
@ -1197,6 +1197,24 @@ files = [
{file = "jupyterlab_widgets-3.0.11.tar.gz", hash = "sha256:dd5ac679593c969af29c9bed054c24f26842baa51352114736756bc035deee27"},
]
[[package]]
name = "langchain-standard-tests"
version = "0.1.1"
description = "Standard tests for LangChain implementations"
optional = false
python-versions = ">=3.8.1,<4.0"
files = []
develop = true
[package.dependencies]
httpx = "^0.27.0"
langchain-core = ">=0.1.40,<0.3"
pytest = ">=7,<9"
[package.source]
type = "directory"
url = "../standard-tests"
[[package]]
name = "langchain-text-splitters"
version = "0.2.2"
@ -2185,7 +2203,6 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@ -3004,4 +3021,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "0a40678314005533ead4fefdbfd8bd27b043641ba99c2211409d8039703ed516"
content-hash = "8db47de0615d9a5324dc0e28f6110908e9b16ccfee699aeafef21f68c879e62a"

View File

@ -85,6 +85,12 @@ pytest-asyncio = "^0.21.1"
grandalf = "^0.8"
pytest-profiling = "^1.7.0"
responses = "^0.25.0"
[tool.poetry.group.test.dependencies.langchain-standard-tests]
path = "../standard-tests"
develop = true
[[tool.poetry.group.test.dependencies.numpy]]
version = "^1.24.0"
python = "<3.12"

View File

@ -0,0 +1,97 @@
from pathlib import Path
import pytest
from langchain_standard_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_core.documents import Document
from langchain_core.embeddings.fake import DeterministicFakeEmbedding
from langchain_core.vectorstores import InMemoryVectorStore
from tests.unit_tests.stubs import AnyStr
class TestInMemoryReadWriteTestSuite(ReadWriteTestSuite):
@pytest.fixture
def vectorstore(self) -> InMemoryVectorStore:
return InMemoryVectorStore(embedding=self.get_embeddings())
class TestAsyncInMemoryReadWriteTestSuite(AsyncReadWriteTestSuite):
@pytest.fixture
async def vectorstore(self) -> InMemoryVectorStore:
return InMemoryVectorStore(embedding=self.get_embeddings())
async def test_inmemory() -> None:
"""Test end to end construction and search."""
store = await InMemoryVectorStore.afrom_texts(
["foo", "bar", "baz"], DeterministicFakeEmbedding(size=6)
)
output = await store.asimilarity_search("foo", k=1)
assert output == [Document(page_content="foo", id=AnyStr())]
output = await store.asimilarity_search("bar", k=2)
assert output == [
Document(page_content="bar", id=AnyStr()),
Document(page_content="baz", id=AnyStr()),
]
output2 = await store.asimilarity_search_with_score("bar", k=2)
assert output2[0][1] > output2[1][1]
async def test_add_by_ids() -> None:
vectorstore = InMemoryVectorStore(embedding=DeterministicFakeEmbedding(size=6))
# Check sync version
ids1 = vectorstore.add_texts(["foo", "bar", "baz"], ids=["1", "2", "3"])
assert ids1 == ["1", "2", "3"]
assert sorted(vectorstore.store.keys()) == ["1", "2", "3"]
ids2 = await vectorstore.aadd_texts(["foo", "bar", "baz"], ids=["4", "5", "6"])
assert ids2 == ["4", "5", "6"]
assert sorted(vectorstore.store.keys()) == ["1", "2", "3", "4", "5", "6"]
async def test_inmemory_mmr() -> None:
texts = ["foo", "foo", "fou", "foy"]
docsearch = await InMemoryVectorStore.afrom_texts(
texts, DeterministicFakeEmbedding(size=6)
)
# make sure we can k > docstore size
output = await docsearch.amax_marginal_relevance_search(
"foo", k=10, lambda_mult=0.1
)
assert len(output) == len(texts)
assert output[0] == Document(page_content="foo", id=AnyStr())
assert output[1] == Document(page_content="foy", id=AnyStr())
async def test_inmemory_dump_load(tmp_path: Path) -> None:
"""Test end to end construction and search."""
embedding = DeterministicFakeEmbedding(size=6)
store = await InMemoryVectorStore.afrom_texts(["foo", "bar", "baz"], embedding)
output = await store.asimilarity_search("foo", k=1)
test_file = str(tmp_path / "test.json")
store.dump(test_file)
loaded_store = InMemoryVectorStore.load(test_file, embedding)
loaded_output = await loaded_store.asimilarity_search("foo", k=1)
assert output == loaded_output
async def test_inmemory_filter() -> None:
"""Test end to end construction and search."""
store = await InMemoryVectorStore.afrom_texts(
["foo", "bar"],
DeterministicFakeEmbedding(size=6),
[{"id": 1}, {"id": 2}],
)
output = await store.asimilarity_search(
"baz", filter=lambda doc: doc.metadata["id"] == 1
)
assert output == [Document(page_content="foo", metadata={"id": 1}, id=AnyStr())]

View File

@ -1,18 +1,14 @@
import pytest
from langchain_core.vectorstores import VectorStore
from langchain_core.vectorstores import (
InMemoryVectorStore,
VectorStore,
)
from langchain_standard_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
# We'll need to move this dependency to core
pytest.importorskip("langchain_community")
from langchain_community.vectorstores.inmemory import ( # type: ignore # noqa
InMemoryVectorStore,
)
class TestInMemoryVectorStore(ReadWriteTestSuite):
@pytest.fixture