Compare commits

...

2 Commits

Author SHA1 Message Date
Eugene Yurtsev
52a7f48522 x 2024-03-26 11:23:59 -04:00
Eugene Yurtsev
b68867e152 x 2024-03-26 11:20:26 -04:00

View File

@@ -17,8 +17,26 @@ def sorted_values(values: Dict[str, str]) -> List[Any]:
return [values[val] for val in sorted(values)]
def _example_to_text(example: Dict[str, str], input_keys: Optional[List[str]]) -> str:
"""Convert example to text for vectorstore for indexing purposes."""
if input_keys:
return " ".join(sorted_values({key: example[key] for key in input_keys}))
else:
return " ".join(sorted_values(example))
class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
"""Example selector that selects examples based on SemanticSimilarity."""
"""Example selector that selects examples based on SemanticSimilarity.
This selector is able to retrieve examples from a vectorstore based on
semantic similarity of the query to examples stored in the vectorstore.
Selecting examples dynamically based on semantic similarity can be useful
in situations where there is a large number of examples, and we want to
select a subset of examples that are most similar to the input query to
improve the performance of the model while keeping the prompt size
within the limits of the model.
"""
vectorstore: VectorStore
"""VectorStore than contains information about examples."""
@@ -38,16 +56,8 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
extra = Extra.forbid
arbitrary_types_allowed = True
@staticmethod
def _example_to_text(
example: Dict[str, str], input_keys: Optional[List[str]]
) -> str:
if input_keys:
return " ".join(sorted_values({key: example[key] for key in input_keys}))
else:
return " ".join(sorted_values(example))
def _documents_to_examples(self, documents: List[Document]) -> List[dict]:
"""Convert the metadata of the documents into examples."""
# Get the examples from the metadata.
# This assumes that examples are stored in metadata.
examples = [dict(e.metadata) for e in documents]
@@ -59,14 +69,14 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
def add_example(self, example: Dict[str, str]) -> str:
"""Add new example to vectorstore."""
ids = self.vectorstore.add_texts(
[self._example_to_text(example, self.input_keys)], metadatas=[example]
[_example_to_text(example, self.input_keys)], metadatas=[example]
)
return ids[0]
async def aadd_example(self, example: Dict[str, str]) -> str:
"""Add new example to vectorstore."""
ids = await self.vectorstore.aadd_texts(
[self._example_to_text(example, self.input_keys)], metadatas=[example]
[_example_to_text(example, self.input_keys)], metadatas=[example]
)
return ids[0]
@@ -75,7 +85,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
# Get the docs with the highest similarity.
vectorstore_kwargs = self.vectorstore_kwargs or {}
example_docs = self.vectorstore.similarity_search(
self._example_to_text(input_variables, self.input_keys),
_example_to_text(input_variables, self.input_keys),
k=self.k,
**vectorstore_kwargs,
)
@@ -86,7 +96,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
# Get the docs with the highest similarity.
vectorstore_kwargs = self.vectorstore_kwargs or {}
example_docs = await self.vectorstore.asimilarity_search(
self._example_to_text(input_variables, self.input_keys),
_example_to_text(input_variables, self.input_keys),
k=self.k,
**vectorstore_kwargs,
)
@@ -121,7 +131,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
Returns:
The ExampleSelector instantiated, backed by a vector store.
"""
string_examples = [cls._example_to_text(eg, input_keys) for eg in examples]
string_examples = [_example_to_text(eg, input_keys) for eg in examples]
vectorstore = vectorstore_cls.from_texts(
string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs
)
@@ -162,7 +172,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
Returns:
The ExampleSelector instantiated, backed by a vector store.
"""
string_examples = [cls._example_to_text(eg, input_keys) for eg in examples]
string_examples = [_example_to_text(eg, input_keys) for eg in examples]
vectorstore = await vectorstore_cls.afrom_texts(
string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs
)