mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-01 09:04:03 +00:00
add return of ids (#254)
not actually sure the desired return in add_example to example selector is actually general/good - whats the use case?
This commit is contained in:
parent
8cba5b791a
commit
2163d064f3
@ -1,13 +1,13 @@
|
||||
"""Interface for selecting examples to include in prompts."""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
class BaseExampleSelector(ABC):
|
||||
"""Interface for selecting examples to include in prompts."""
|
||||
|
||||
@abstractmethod
|
||||
def add_example(self, example: Dict[str, str]) -> None:
|
||||
def add_example(self, example: Dict[str, str]) -> Any:
|
||||
"""Add new example to store for a key."""
|
||||
|
||||
@abstractmethod
|
||||
|
@ -31,10 +31,11 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def add_example(self, example: Dict[str, str]) -> None:
|
||||
def add_example(self, example: Dict[str, str]) -> str:
|
||||
"""Add new example to vectorstore."""
|
||||
string_example = " ".join(sorted_values(example))
|
||||
self.vectorstore.add_texts([string_example], metadatas=[example])
|
||||
ids = self.vectorstore.add_texts([string_example], metadatas=[example])
|
||||
return ids[0]
|
||||
|
||||
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
||||
"""Select which examples to use based on semantic similarity."""
|
||||
|
@ -14,8 +14,16 @@ class VectorStore(ABC):
|
||||
@abstractmethod
|
||||
def add_texts(
|
||||
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
||||
) -> None:
|
||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
||||
|
@ -55,7 +55,7 @@ class ElasticVectorSearch(VectorStore):
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import elasticsearch python package. "
|
||||
"Please install it with `pip install elasticearch`."
|
||||
"Please install it with `pip install elasticsearch`."
|
||||
)
|
||||
self.embedding_function = embedding_function
|
||||
self.index_name = index_name
|
||||
@ -69,29 +69,42 @@ class ElasticVectorSearch(VectorStore):
|
||||
|
||||
def add_texts(
|
||||
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
||||
) -> None:
|
||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
try:
|
||||
from elasticsearch.helpers import bulk
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import elasticsearch python package. "
|
||||
"Please install it with `pip install elasticearch`."
|
||||
"Please install it with `pip install elasticsearch`."
|
||||
)
|
||||
requests = []
|
||||
ids = []
|
||||
for i, text in enumerate(texts):
|
||||
metadata = metadatas[i] if metadatas else {}
|
||||
_id = str(uuid.uuid4())
|
||||
request = {
|
||||
"_op_type": "index",
|
||||
"_index": self.index_name,
|
||||
"vector": self.embedding_function(text),
|
||||
"text": text,
|
||||
"metadata": metadata,
|
||||
"_id": _id,
|
||||
}
|
||||
ids.append(_id)
|
||||
requests.append(request)
|
||||
bulk(self.client, requests)
|
||||
# TODO: add option not to refresh
|
||||
self.client.indices.refresh(index=self.index_name)
|
||||
return ids
|
||||
|
||||
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
||||
"""Return docs most similar to query.
|
||||
|
@ -41,8 +41,16 @@ class FAISS(VectorStore):
|
||||
|
||||
def add_texts(
|
||||
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
||||
) -> None:
|
||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
if not isinstance(self.docstore, AddableMixin):
|
||||
raise ValueError(
|
||||
"If trying to add texts, the underlying docstore should support "
|
||||
@ -66,6 +74,7 @@ class FAISS(VectorStore):
|
||||
self.docstore.add({_id: doc for _, _id, doc in full_info})
|
||||
index_to_id = {index: _id for index, _id, _ in full_info}
|
||||
self.index_to_docstore_id.update(index_to_id)
|
||||
return [_id for _, _id, _ in full_info]
|
||||
|
||||
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
||||
"""Return docs most similar to query.
|
||||
|
Loading…
Reference in New Issue
Block a user