core: docstrings example_selectors (#23542)

Added missed docstrings. Formatted docstrings to the consistent form.
This commit is contained in:
Leonid Ganeline 2024-06-26 14:10:40 -07:00 committed by GitHub
parent 3bf1d98dbf
commit 1141b08eb8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 108 additions and 18 deletions

View File

@ -2,6 +2,7 @@
in prompts. in prompts.
This allows us to select examples that are most relevant to the input. This allows us to select examples that are most relevant to the input.
""" """
from langchain_core.example_selectors.base import BaseExampleSelector from langchain_core.example_selectors.base import BaseExampleSelector
from langchain_core.example_selectors.length_based import ( from langchain_core.example_selectors.length_based import (
LengthBasedExampleSelector, LengthBasedExampleSelector,

View File

@ -1,4 +1,5 @@
"""Interface for selecting examples to include in prompts.""" """Interface for selecting examples to include in prompts."""
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, Dict, List from typing import Any, Dict, List
@ -10,16 +11,34 @@ class BaseExampleSelector(ABC):
@abstractmethod @abstractmethod
def add_example(self, example: Dict[str, str]) -> Any: def add_example(self, example: Dict[str, str]) -> Any:
"""Add new example to store.""" """Add new example to store.
Args:
example: A dictionary with keys as input variables
and values as their values."""
async def aadd_example(self, example: Dict[str, str]) -> Any: async def aadd_example(self, example: Dict[str, str]) -> Any:
"""Add new example to store.""" """Async add new example to store.
Args:
example: A dictionary with keys as input variables
and values as their values."""
return await run_in_executor(None, self.add_example, example) return await run_in_executor(None, self.add_example, example)
@abstractmethod @abstractmethod
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Select which examples to use based on the inputs.""" """Select which examples to use based on the inputs.
Args:
input_variables: A dictionary with keys as input variables
and values as their values."""
async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]: async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Select which examples to use based on the inputs.""" """Async select which examples to use based on the inputs.
Args:
input_variables: A dictionary with keys as input variables
and values as their values."""
return await run_in_executor(None, self.select_examples, input_variables) return await run_in_executor(None, self.select_examples, input_variables)

View File

@ -1,4 +1,5 @@
"""Select examples based on length.""" """Select examples based on length."""
import re import re
from typing import Callable, Dict, List from typing import Callable, Dict, List
@ -27,15 +28,27 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
"""Max length for the prompt, beyond which examples are cut.""" """Max length for the prompt, beyond which examples are cut."""
example_text_lengths: List[int] = [] #: :meta private: example_text_lengths: List[int] = [] #: :meta private:
"""Length of each example."""
def add_example(self, example: Dict[str, str]) -> None: def add_example(self, example: Dict[str, str]) -> None:
"""Add new example to list.""" """Add new example to list.
Args:
example: A dictionary with keys as input variables
and values as their values.
"""
self.examples.append(example) self.examples.append(example)
string_example = self.example_prompt.format(**example) string_example = self.example_prompt.format(**example)
self.example_text_lengths.append(self.get_text_length(string_example)) self.example_text_lengths.append(self.get_text_length(string_example))
async def aadd_example(self, example: Dict[str, str]) -> None: async def aadd_example(self, example: Dict[str, str]) -> None:
"""Add new example to list.""" """Async add new example to list.
Args:
example: A dictionary with keys as input variables
and values as their values.
"""
self.add_example(example) self.add_example(example)
@validator("example_text_lengths", always=True) @validator("example_text_lengths", always=True)
@ -51,7 +64,15 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
return [get_text_length(eg) for eg in string_examples] return [get_text_length(eg) for eg in string_examples]
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Select which examples to use based on the input lengths.""" """Select which examples to use based on the input lengths.
Args:
input_variables: A dictionary with keys as input variables
and values as their values.
Returns:
A list of examples to include in the prompt.
"""
inputs = " ".join(input_variables.values()) inputs = " ".join(input_variables.values())
remaining_length = self.max_length - self.get_text_length(inputs) remaining_length = self.max_length - self.get_text_length(inputs)
i = 0 i = 0
@ -67,5 +88,13 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
return examples return examples
async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]: async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Select which examples to use based on the input lengths.""" """Async select which examples to use based on the input lengths.
Args:
input_variables: A dictionary with keys as input variables
and values as their values.
Returns:
A list of examples to include in the prompt.
"""
return self.select_examples(input_variables) return self.select_examples(input_variables)

View File

@ -1,4 +1,5 @@
"""Example selector that selects examples based on SemanticSimilarity.""" """Example selector that selects examples based on SemanticSimilarity."""
from __future__ import annotations from __future__ import annotations
from abc import ABC from abc import ABC
@ -14,7 +15,15 @@ if TYPE_CHECKING:
def sorted_values(values: Dict[str, str]) -> List[Any]: def sorted_values(values: Dict[str, str]) -> List[Any]:
"""Return a list of values in dict sorted by key.""" """Return a list of values in dict sorted by key.
Args:
values: A dictionary with keys as input variables
and values as their values.
Returns:
A list of values in dict sorted by key.
"""
return [values[val] for val in sorted(values)] return [values[val] for val in sorted(values)]
@ -58,14 +67,30 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
return examples return examples
def add_example(self, example: Dict[str, str]) -> str: def add_example(self, example: Dict[str, str]) -> str:
"""Add new example to vectorstore.""" """Add a new example to vectorstore.
Args:
example: A dictionary with keys as input variables
and values as their values.
Returns:
The ID of the added example.
"""
ids = self.vectorstore.add_texts( ids = self.vectorstore.add_texts(
[self._example_to_text(example, self.input_keys)], metadatas=[example] [self._example_to_text(example, self.input_keys)], metadatas=[example]
) )
return ids[0] return ids[0]
async def aadd_example(self, example: Dict[str, str]) -> str: async def aadd_example(self, example: Dict[str, str]) -> str:
"""Add new example to vectorstore.""" """Async add new example to vectorstore.
Args:
example: A dictionary with keys as input variables
and values as their values.
Returns:
The ID of the added example.
"""
ids = await self.vectorstore.aadd_texts( ids = await self.vectorstore.aadd_texts(
[self._example_to_text(example, self.input_keys)], metadatas=[example] [self._example_to_text(example, self.input_keys)], metadatas=[example]
) )
@ -76,7 +101,14 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
"""Select examples based on semantic similarity.""" """Select examples based on semantic similarity."""
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Select examples based on semantic similarity.""" """Select examples based on semantic similarity.
Args:
input_variables: The input variables to use for search.
Returns:
The selected examples.
"""
# Get the docs with the highest similarity. # Get the docs with the highest similarity.
vectorstore_kwargs = self.vectorstore_kwargs or {} vectorstore_kwargs = self.vectorstore_kwargs or {}
example_docs = self.vectorstore.similarity_search( example_docs = self.vectorstore.similarity_search(
@ -87,7 +119,14 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
return self._documents_to_examples(example_docs) return self._documents_to_examples(example_docs)
async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]: async def aselect_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Asynchronously select examples based on semantic similarity.""" """Asynchronously select examples based on semantic similarity.
Args:
input_variables: The input variables to use for search.
Returns:
The selected examples.
"""
# Get the docs with the highest similarity. # Get the docs with the highest similarity.
vectorstore_kwargs = self.vectorstore_kwargs or {} vectorstore_kwargs = self.vectorstore_kwargs or {}
example_docs = await self.vectorstore.asimilarity_search( example_docs = await self.vectorstore.asimilarity_search(
@ -118,7 +157,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
examples: List of examples to use in the prompt. examples: List of examples to use in the prompt.
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings(). embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS. vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select k: Number of examples to select. Default is 4.
input_keys: If provided, the search is based on the input variables input_keys: If provided, the search is based on the input variables
instead of all variables. instead of all variables.
example_keys: If provided, keys to filter examples to. example_keys: If provided, keys to filter examples to.
@ -154,7 +193,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
vectorstore_kwargs: Optional[dict] = None, vectorstore_kwargs: Optional[dict] = None,
**vectorstore_cls_kwargs: Any, **vectorstore_cls_kwargs: Any,
) -> SemanticSimilarityExampleSelector: ) -> SemanticSimilarityExampleSelector:
"""Create k-shot example selector using example list and embeddings. """Async create k-shot example selector using example list and embeddings.
Reshuffles examples dynamically based on query similarity. Reshuffles examples dynamically based on query similarity.
@ -162,7 +201,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
examples: List of examples to use in the prompt. examples: List of examples to use in the prompt.
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings(). embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS. vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select k: Number of examples to select. Default is 4.
input_keys: If provided, the search is based on the input variables input_keys: If provided, the search is based on the input variables
instead of all variables. instead of all variables.
example_keys: If provided, keys to filter examples to. example_keys: If provided, keys to filter examples to.
@ -249,8 +288,9 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
examples: List of examples to use in the prompt. examples: List of examples to use in the prompt.
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings(). embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS. vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select k: Number of examples to select. Default is 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm. fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Default is 20.
input_keys: If provided, the search is based on the input variables input_keys: If provided, the search is based on the input variables
instead of all variables. instead of all variables.
example_keys: If provided, keys to filter examples to. example_keys: If provided, keys to filter examples to.
@ -297,8 +337,9 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
examples: List of examples to use in the prompt. examples: List of examples to use in the prompt.
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings(). embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS. vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select k: Number of examples to select. Default is 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm. fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Default is 20.
input_keys: If provided, the search is based on the input variables input_keys: If provided, the search is based on the input variables
instead of all variables. instead of all variables.
example_keys: If provided, keys to filter examples to. example_keys: If provided, keys to filter examples to.