mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 07:35:18 +00:00
Add workaround for not having async vector store methods (#2733)
This allows us to use the async API for the Retrieval chains, though it is not guaranteed to be thread safe.
This commit is contained in:
parent
0806951c07
commit
c1521ddbdb
@ -1,7 +1,9 @@
|
||||
"""Interface for vector stores."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import partial
|
||||
from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar
|
||||
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
@ -83,7 +85,12 @@ class VectorStore(ABC):
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query."""
|
||||
raise NotImplementedError
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.similarity_search, query, k, **kwargs)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
@ -103,7 +110,12 @@ class VectorStore(ABC):
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector."""
|
||||
raise NotImplementedError
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.similarity_search_by_vector, embedding, k, **kwargs)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def max_marginal_relevance_search(
|
||||
self, query: str, k: int = 4, fetch_k: int = 20
|
||||
@ -127,7 +139,12 @@ class VectorStore(ABC):
|
||||
self, query: str, k: int = 4, fetch_k: int = 20
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance."""
|
||||
raise NotImplementedError
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.max_marginal_relevance_search, query, k, fetch_k)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
self, embedding: List[float], k: int = 4, fetch_k: int = 20
|
||||
|
@ -1,4 +1,6 @@
|
||||
"""Test Chroma functionality."""
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.vectorstores import Chroma
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
@ -14,6 +16,17 @@ def test_chroma() -> None:
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chroma_async() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = Chroma.from_texts(
|
||||
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
def test_chroma_with_metadatas() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
|
Loading…
Reference in New Issue
Block a user