Add workaround for not having async vector store methods (#2733)

This allows us to use the async API for the Retrieval chains, though it is not guaranteed to be thread safe.
2026-02-21 06:33:41 +00:00 · 2023-04-11 18:49:08 -07:00
parent 0806951c07
commit c1521ddbdb
2 changed files with 33 additions and 3 deletions
--- a/langchain/vectorstores/base.py
+++ b/langchain/vectorstores/base.py
@@ -1,7 +1,9 @@
 """Interface for vector stores."""
 from __future__ import annotations

+import asyncio
 from abc import ABC, abstractmethod
+from functools import partial
 from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar

 from pydantic import BaseModel, Field, root_validator
@@ -83,7 +85,12 @@ class VectorStore(ABC):
        self, query: str, k: int = 4, **kwargs: Any
    ) -> List[Document]:
        """Return docs most similar to query."""
-        raise NotImplementedError
+
+        # This is a temporary workaround to make the similarity search
+        # asynchronous. The proper solution is to make the similarity search
+        # asynchronous in the vector store implementations.
+        func = partial(self.similarity_search, query, k, **kwargs)
+        return await asyncio.get_event_loop().run_in_executor(None, func)

    def similarity_search_by_vector(
        self, embedding: List[float], k: int = 4, **kwargs: Any
@@ -103,7 +110,12 @@ class VectorStore(ABC):
        self, embedding: List[float], k: int = 4, **kwargs: Any
    ) -> List[Document]:
        """Return docs most similar to embedding vector."""
-        raise NotImplementedError
+
+        # This is a temporary workaround to make the similarity search
+        # asynchronous. The proper solution is to make the similarity search
+        # asynchronous in the vector store implementations.
+        func = partial(self.similarity_search_by_vector, embedding, k, **kwargs)
+        return await asyncio.get_event_loop().run_in_executor(None, func)

    def max_marginal_relevance_search(
        self, query: str, k: int = 4, fetch_k: int = 20
@@ -127,7 +139,12 @@ class VectorStore(ABC):
        self, query: str, k: int = 4, fetch_k: int = 20
    ) -> List[Document]:
        """Return docs selected using the maximal marginal relevance."""
-        raise NotImplementedError
+
+        # This is a temporary workaround to make the similarity search
+        # asynchronous. The proper solution is to make the similarity search
+        # asynchronous in the vector store implementations.
+        func = partial(self.max_marginal_relevance_search, query, k, fetch_k)
+        return await asyncio.get_event_loop().run_in_executor(None, func)

    def max_marginal_relevance_search_by_vector(
        self, embedding: List[float], k: int = 4, fetch_k: int = 20
--- a/tests/integration_tests/vectorstores/test_chroma.py
+++ b/tests/integration_tests/vectorstores/test_chroma.py
@@ -1,4 +1,6 @@
 """Test Chroma functionality."""
+import pytest
+
 from langchain.docstore.document import Document
 from langchain.vectorstores import Chroma
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
@@ -14,6 +16,17 @@ def test_chroma() -> None:
    assert output == [Document(page_content="foo")]


+@pytest.mark.asyncio
+async def test_chroma_async() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
+    )
+    output = await docsearch.asimilarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+
 def test_chroma_with_metadatas() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]