mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-13 22:59:05 +00:00
core: Add async methods to BaseExampleSelector and SemanticSimilarityExampleSelector (#19399)
Few-Shot prompt template may use a `SemanticSimilarityExampleSelector` that in turn uses a `VectorStore` that does I/O operations. So to work correctly on the event loop, we need: * async methods for the `VectorStore` (OK) * async methods for the `SemanticSimilarityExampleSelector` (this PR) * async methods for `BasePromptTemplate` and `BaseChatPromptTemplate` (future work)
This commit is contained in:
parent
29c58528c7
commit
a9457d269e
@ -2,6 +2,8 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
from langchain_core.runnables import run_in_executor
|
||||||
|
|
||||||
|
|
||||||
class BaseExampleSelector(ABC):
|
class BaseExampleSelector(ABC):
|
||||||
"""Interface for selecting examples to include in prompts."""
|
"""Interface for selecting examples to include in prompts."""
|
||||||
@ -10,6 +12,14 @@ class BaseExampleSelector(ABC):
|
|||||||
def add_example(self, example: Dict[str, str]) -> Any:
|
def add_example(self, example: Dict[str, str]) -> Any:
|
||||||
"""Add new example to store."""
|
"""Add new example to store."""
|
||||||
|
|
||||||
|
async def aadd_example(self, example: Dict[str, str]) -> Any:
|
||||||
|
"""Add new example to store."""
|
||||||
|
return await run_in_executor(None, self.add_example, example)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
||||||
"""Select which examples to use based on the inputs."""
|
"""Select which examples to use based on the inputs."""
|
||||||
|
|
||||||
|
async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
||||||
|
"""Select which examples to use based on the inputs."""
|
||||||
|
return await run_in_executor(None, self.select_examples, input_variables)
|
||||||
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
from langchain_core.example_selectors.base import BaseExampleSelector
|
from langchain_core.example_selectors.base import BaseExampleSelector
|
||||||
from langchain_core.pydantic_v1 import BaseModel, Extra
|
from langchain_core.pydantic_v1 import BaseModel, Extra
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
@ -37,34 +38,59 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
|
|||||||
extra = Extra.forbid
|
extra = Extra.forbid
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _example_to_text(
|
||||||
|
example: Dict[str, str], input_keys: Optional[List[str]]
|
||||||
|
) -> str:
|
||||||
|
if input_keys:
|
||||||
|
return " ".join(sorted_values({key: example[key] for key in input_keys}))
|
||||||
|
else:
|
||||||
|
return " ".join(sorted_values(example))
|
||||||
|
|
||||||
|
def _documents_to_examples(self, documents: List[Document]) -> List[dict]:
|
||||||
|
# Get the examples from the metadata.
|
||||||
|
# This assumes that examples are stored in metadata.
|
||||||
|
examples = [dict(e.metadata) for e in documents]
|
||||||
|
# If example keys are provided, filter examples to those keys.
|
||||||
|
if self.example_keys:
|
||||||
|
examples = [{k: eg[k] for k in self.example_keys} for eg in examples]
|
||||||
|
return examples
|
||||||
|
|
||||||
def add_example(self, example: Dict[str, str]) -> str:
|
def add_example(self, example: Dict[str, str]) -> str:
|
||||||
"""Add new example to vectorstore."""
|
"""Add new example to vectorstore."""
|
||||||
if self.input_keys:
|
ids = self.vectorstore.add_texts(
|
||||||
string_example = " ".join(
|
[self._example_to_text(example, self.input_keys)], metadatas=[example]
|
||||||
sorted_values({key: example[key] for key in self.input_keys})
|
)
|
||||||
)
|
return ids[0]
|
||||||
else:
|
|
||||||
string_example = " ".join(sorted_values(example))
|
async def aadd_example(self, example: Dict[str, str]) -> str:
|
||||||
ids = self.vectorstore.add_texts([string_example], metadatas=[example])
|
"""Add new example to vectorstore."""
|
||||||
|
ids = await self.vectorstore.aadd_texts(
|
||||||
|
[self._example_to_text(example, self.input_keys)], metadatas=[example]
|
||||||
|
)
|
||||||
return ids[0]
|
return ids[0]
|
||||||
|
|
||||||
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
||||||
"""Select which examples to use based on semantic similarity."""
|
"""Select which examples to use based on semantic similarity."""
|
||||||
# Get the docs with the highest similarity.
|
# Get the docs with the highest similarity.
|
||||||
if self.input_keys:
|
|
||||||
input_variables = {key: input_variables[key] for key in self.input_keys}
|
|
||||||
vectorstore_kwargs = self.vectorstore_kwargs or {}
|
vectorstore_kwargs = self.vectorstore_kwargs or {}
|
||||||
query = " ".join(sorted_values(input_variables))
|
|
||||||
example_docs = self.vectorstore.similarity_search(
|
example_docs = self.vectorstore.similarity_search(
|
||||||
query, k=self.k, **vectorstore_kwargs
|
self._example_to_text(input_variables, self.input_keys),
|
||||||
|
k=self.k,
|
||||||
|
**vectorstore_kwargs,
|
||||||
)
|
)
|
||||||
# Get the examples from the metadata.
|
return self._documents_to_examples(example_docs)
|
||||||
# This assumes that examples are stored in metadata.
|
|
||||||
examples = [dict(e.metadata) for e in example_docs]
|
async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
||||||
# If example keys are provided, filter examples to those keys.
|
"""Select which examples to use based on semantic similarity."""
|
||||||
if self.example_keys:
|
# Get the docs with the highest similarity.
|
||||||
examples = [{k: eg[k] for k in self.example_keys} for eg in examples]
|
vectorstore_kwargs = self.vectorstore_kwargs or {}
|
||||||
return examples
|
example_docs = await self.vectorstore.asimilarity_search(
|
||||||
|
self._example_to_text(input_variables, self.input_keys),
|
||||||
|
k=self.k,
|
||||||
|
**vectorstore_kwargs,
|
||||||
|
)
|
||||||
|
return self._documents_to_examples(example_docs)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_examples(
|
def from_examples(
|
||||||
@ -95,13 +121,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
|
|||||||
Returns:
|
Returns:
|
||||||
The ExampleSelector instantiated, backed by a vector store.
|
The ExampleSelector instantiated, backed by a vector store.
|
||||||
"""
|
"""
|
||||||
if input_keys:
|
string_examples = [cls._example_to_text(eg, input_keys) for eg in examples]
|
||||||
string_examples = [
|
|
||||||
" ".join(sorted_values({k: eg[k] for k in input_keys}))
|
|
||||||
for eg in examples
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
string_examples = [" ".join(sorted_values(eg)) for eg in examples]
|
|
||||||
vectorstore = vectorstore_cls.from_texts(
|
vectorstore = vectorstore_cls.from_texts(
|
||||||
string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs
|
string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs
|
||||||
)
|
)
|
||||||
@ -113,6 +133,47 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
|
|||||||
vectorstore_kwargs=vectorstore_kwargs,
|
vectorstore_kwargs=vectorstore_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def afrom_examples(
|
||||||
|
cls,
|
||||||
|
examples: List[dict],
|
||||||
|
embeddings: Embeddings,
|
||||||
|
vectorstore_cls: Type[VectorStore],
|
||||||
|
k: int = 4,
|
||||||
|
input_keys: Optional[List[str]] = None,
|
||||||
|
*,
|
||||||
|
example_keys: Optional[List[str]] = None,
|
||||||
|
vectorstore_kwargs: Optional[dict] = None,
|
||||||
|
**vectorstore_cls_kwargs: Any,
|
||||||
|
) -> SemanticSimilarityExampleSelector:
|
||||||
|
"""Create k-shot example selector using example list and embeddings.
|
||||||
|
|
||||||
|
Reshuffles examples dynamically based on query similarity.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
examples: List of examples to use in the prompt.
|
||||||
|
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
||||||
|
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
||||||
|
k: Number of examples to select
|
||||||
|
input_keys: If provided, the search is based on the input variables
|
||||||
|
instead of all variables.
|
||||||
|
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The ExampleSelector instantiated, backed by a vector store.
|
||||||
|
"""
|
||||||
|
string_examples = [cls._example_to_text(eg, input_keys) for eg in examples]
|
||||||
|
vectorstore = await vectorstore_cls.afrom_texts(
|
||||||
|
string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs
|
||||||
|
)
|
||||||
|
return cls(
|
||||||
|
vectorstore=vectorstore,
|
||||||
|
k=k,
|
||||||
|
input_keys=input_keys,
|
||||||
|
example_keys=example_keys,
|
||||||
|
vectorstore_kwargs=vectorstore_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MaxMarginalRelevanceExampleSelector(SemanticSimilarityExampleSelector):
|
class MaxMarginalRelevanceExampleSelector(SemanticSimilarityExampleSelector):
|
||||||
"""ExampleSelector that selects examples based on Max Marginal Relevance.
|
"""ExampleSelector that selects examples based on Max Marginal Relevance.
|
||||||
|
26
libs/core/tests/unit_tests/example_selectors/test_base.py
Normal file
26
libs/core/tests/unit_tests/example_selectors/test_base.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from langchain_core.example_selectors import BaseExampleSelector
|
||||||
|
|
||||||
|
|
||||||
|
class DummyExampleSelector(BaseExampleSelector):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.example: Optional[Dict[str, str]] = None
|
||||||
|
|
||||||
|
def add_example(self, example: Dict[str, str]) -> None:
|
||||||
|
self.example = example
|
||||||
|
|
||||||
|
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
||||||
|
return [input_variables]
|
||||||
|
|
||||||
|
|
||||||
|
async def test_aadd_example() -> None:
|
||||||
|
selector = DummyExampleSelector()
|
||||||
|
await selector.aadd_example({"foo": "bar"})
|
||||||
|
assert selector.example == {"foo": "bar"}
|
||||||
|
|
||||||
|
|
||||||
|
async def test_aselect_examples() -> None:
|
||||||
|
selector = DummyExampleSelector()
|
||||||
|
examples = await selector.aselect_examples({"foo": "bar"})
|
||||||
|
assert examples == [{"foo": "bar"}]
|
139
libs/core/tests/unit_tests/example_selectors/test_similarity.py
Normal file
139
libs/core/tests/unit_tests/example_selectors/test_similarity.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
from typing import Any, Iterable, List, Optional, cast
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_core.embeddings import Embeddings, FakeEmbeddings
|
||||||
|
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
|
|
||||||
|
class DummyVectorStore(VectorStore):
|
||||||
|
def __init__(self, init_arg: Optional[str] = None):
|
||||||
|
self.texts: List[str] = []
|
||||||
|
self.metadatas: List[dict] = []
|
||||||
|
self._embeddings: Optional[Embeddings] = None
|
||||||
|
self.init_arg = init_arg
|
||||||
|
|
||||||
|
@property
|
||||||
|
def embeddings(self) -> Optional[Embeddings]:
|
||||||
|
return self._embeddings
|
||||||
|
|
||||||
|
def add_texts(
|
||||||
|
self,
|
||||||
|
texts: Iterable[str],
|
||||||
|
metadatas: Optional[List[dict]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[str]:
|
||||||
|
self.texts.extend(texts)
|
||||||
|
if metadatas:
|
||||||
|
self.metadatas.extend(metadatas)
|
||||||
|
return ["dummy_id"]
|
||||||
|
|
||||||
|
def similarity_search(
|
||||||
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
|
) -> List[Document]:
|
||||||
|
return [
|
||||||
|
Document(page_content=query, metadata={"metadata": query, "other": "other"})
|
||||||
|
] * k
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_texts(
|
||||||
|
cls,
|
||||||
|
texts: List[str],
|
||||||
|
embedding: Embeddings,
|
||||||
|
metadatas: Optional[List[dict]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> "DummyVectorStore":
|
||||||
|
store = DummyVectorStore(**kwargs)
|
||||||
|
store.add_texts(texts, metadatas)
|
||||||
|
store._embeddings = embedding
|
||||||
|
return store
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_example() -> None:
|
||||||
|
vector_store = DummyVectorStore()
|
||||||
|
selector = SemanticSimilarityExampleSelector(
|
||||||
|
vectorstore=vector_store, input_keys=["foo", "foo3"]
|
||||||
|
)
|
||||||
|
selector.add_example({"foo": "bar", "foo2": "bar2", "foo3": "bar3"})
|
||||||
|
assert vector_store.texts == ["bar bar3"]
|
||||||
|
assert vector_store.metadatas == [{"foo": "bar", "foo2": "bar2", "foo3": "bar3"}]
|
||||||
|
|
||||||
|
|
||||||
|
async def test_aadd_example() -> None:
|
||||||
|
vector_store = DummyVectorStore()
|
||||||
|
selector = SemanticSimilarityExampleSelector(
|
||||||
|
vectorstore=vector_store, input_keys=["foo", "foo3"]
|
||||||
|
)
|
||||||
|
await selector.aadd_example({"foo": "bar", "foo2": "bar2", "foo3": "bar3"})
|
||||||
|
assert vector_store.texts == ["bar bar3"]
|
||||||
|
assert vector_store.metadatas == [{"foo": "bar", "foo2": "bar2", "foo3": "bar3"}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_select_examples() -> None:
|
||||||
|
vector_store = DummyVectorStore()
|
||||||
|
selector = SemanticSimilarityExampleSelector(
|
||||||
|
vectorstore=vector_store, input_keys=["foo2"], example_keys=["metadata"], k=2
|
||||||
|
)
|
||||||
|
examples = selector.select_examples({"foo": "bar", "foo2": "bar2"})
|
||||||
|
assert examples == [{"metadata": "bar2"}] * 2
|
||||||
|
|
||||||
|
|
||||||
|
async def test_aselect_examples() -> None:
|
||||||
|
vector_store = DummyVectorStore()
|
||||||
|
selector = SemanticSimilarityExampleSelector(
|
||||||
|
vectorstore=vector_store, input_keys=["foo2"], example_keys=["metadata"], k=2
|
||||||
|
)
|
||||||
|
examples = await selector.aselect_examples({"foo": "bar", "foo2": "bar2"})
|
||||||
|
assert examples == [{"metadata": "bar2"}] * 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_from_examples() -> None:
|
||||||
|
examples = [{"foo": "bar"}]
|
||||||
|
embeddings = FakeEmbeddings(size=1)
|
||||||
|
selector = SemanticSimilarityExampleSelector.from_examples(
|
||||||
|
examples=examples,
|
||||||
|
embeddings=embeddings,
|
||||||
|
vectorstore_cls=DummyVectorStore,
|
||||||
|
k=2,
|
||||||
|
input_keys=["foo"],
|
||||||
|
example_keys=["some_example_key"],
|
||||||
|
vectorstore_kwargs={"vs_foo": "vs_bar"},
|
||||||
|
init_arg="some_init_arg",
|
||||||
|
)
|
||||||
|
assert selector.input_keys == ["foo"]
|
||||||
|
assert selector.example_keys == ["some_example_key"]
|
||||||
|
assert selector.k == 2
|
||||||
|
assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}
|
||||||
|
|
||||||
|
assert isinstance(selector.vectorstore, DummyVectorStore)
|
||||||
|
vector_store = cast(DummyVectorStore, selector.vectorstore)
|
||||||
|
assert vector_store.embeddings is embeddings
|
||||||
|
assert vector_store.init_arg == "some_init_arg"
|
||||||
|
assert vector_store.texts == ["bar"]
|
||||||
|
assert vector_store.metadatas == [{"foo": "bar"}]
|
||||||
|
|
||||||
|
|
||||||
|
async def test_afrom_examples() -> None:
|
||||||
|
examples = [{"foo": "bar"}]
|
||||||
|
embeddings = FakeEmbeddings(size=1)
|
||||||
|
selector = await SemanticSimilarityExampleSelector.afrom_examples(
|
||||||
|
examples=examples,
|
||||||
|
embeddings=embeddings,
|
||||||
|
vectorstore_cls=DummyVectorStore,
|
||||||
|
k=2,
|
||||||
|
input_keys=["foo"],
|
||||||
|
example_keys=["some_example_key"],
|
||||||
|
vectorstore_kwargs={"vs_foo": "vs_bar"},
|
||||||
|
init_arg="some_init_arg",
|
||||||
|
)
|
||||||
|
assert selector.input_keys == ["foo"]
|
||||||
|
assert selector.example_keys == ["some_example_key"]
|
||||||
|
assert selector.k == 2
|
||||||
|
assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}
|
||||||
|
|
||||||
|
assert isinstance(selector.vectorstore, DummyVectorStore)
|
||||||
|
vector_store = cast(DummyVectorStore, selector.vectorstore)
|
||||||
|
assert vector_store.embeddings is embeddings
|
||||||
|
assert vector_store.init_arg == "some_init_arg"
|
||||||
|
assert vector_store.texts == ["bar"]
|
||||||
|
assert vector_store.metadatas == [{"foo": "bar"}]
|
Loading…
Reference in New Issue
Block a user