mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-02 01:23:07 +00:00
add return of ids (#254)
not actually sure the desired return in add_example to example selector is actually general/good - whats the use case?
This commit is contained in:
parent
8cba5b791a
commit
2163d064f3
@ -1,13 +1,13 @@
|
|||||||
"""Interface for selecting examples to include in prompts."""
|
"""Interface for selecting examples to include in prompts."""
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
|
||||||
class BaseExampleSelector(ABC):
|
class BaseExampleSelector(ABC):
|
||||||
"""Interface for selecting examples to include in prompts."""
|
"""Interface for selecting examples to include in prompts."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def add_example(self, example: Dict[str, str]) -> None:
|
def add_example(self, example: Dict[str, str]) -> Any:
|
||||||
"""Add new example to store for a key."""
|
"""Add new example to store for a key."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -31,10 +31,11 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
|
|||||||
extra = Extra.forbid
|
extra = Extra.forbid
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
def add_example(self, example: Dict[str, str]) -> None:
|
def add_example(self, example: Dict[str, str]) -> str:
|
||||||
"""Add new example to vectorstore."""
|
"""Add new example to vectorstore."""
|
||||||
string_example = " ".join(sorted_values(example))
|
string_example = " ".join(sorted_values(example))
|
||||||
self.vectorstore.add_texts([string_example], metadatas=[example])
|
ids = self.vectorstore.add_texts([string_example], metadatas=[example])
|
||||||
|
return ids[0]
|
||||||
|
|
||||||
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
|
||||||
"""Select which examples to use based on semantic similarity."""
|
"""Select which examples to use based on semantic similarity."""
|
||||||
|
@ -14,8 +14,16 @@ class VectorStore(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def add_texts(
|
def add_texts(
|
||||||
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
||||||
) -> None:
|
) -> List[str]:
|
||||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
"""Run more texts through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: Iterable of strings to add to the vectorstore.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ids from adding the texts into the vectorstore.
|
||||||
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
||||||
|
@ -55,7 +55,7 @@ class ElasticVectorSearch(VectorStore):
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Could not import elasticsearch python package. "
|
"Could not import elasticsearch python package. "
|
||||||
"Please install it with `pip install elasticearch`."
|
"Please install it with `pip install elasticsearch`."
|
||||||
)
|
)
|
||||||
self.embedding_function = embedding_function
|
self.embedding_function = embedding_function
|
||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
@ -69,29 +69,42 @@ class ElasticVectorSearch(VectorStore):
|
|||||||
|
|
||||||
def add_texts(
|
def add_texts(
|
||||||
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
||||||
) -> None:
|
) -> List[str]:
|
||||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
"""Run more texts through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: Iterable of strings to add to the vectorstore.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ids from adding the texts into the vectorstore.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
from elasticsearch.helpers import bulk
|
from elasticsearch.helpers import bulk
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Could not import elasticsearch python package. "
|
"Could not import elasticsearch python package. "
|
||||||
"Please install it with `pip install elasticearch`."
|
"Please install it with `pip install elasticsearch`."
|
||||||
)
|
)
|
||||||
requests = []
|
requests = []
|
||||||
|
ids = []
|
||||||
for i, text in enumerate(texts):
|
for i, text in enumerate(texts):
|
||||||
metadata = metadatas[i] if metadatas else {}
|
metadata = metadatas[i] if metadatas else {}
|
||||||
|
_id = str(uuid.uuid4())
|
||||||
request = {
|
request = {
|
||||||
"_op_type": "index",
|
"_op_type": "index",
|
||||||
"_index": self.index_name,
|
"_index": self.index_name,
|
||||||
"vector": self.embedding_function(text),
|
"vector": self.embedding_function(text),
|
||||||
"text": text,
|
"text": text,
|
||||||
"metadata": metadata,
|
"metadata": metadata,
|
||||||
|
"_id": _id,
|
||||||
}
|
}
|
||||||
|
ids.append(_id)
|
||||||
requests.append(request)
|
requests.append(request)
|
||||||
bulk(self.client, requests)
|
bulk(self.client, requests)
|
||||||
# TODO: add option not to refresh
|
# TODO: add option not to refresh
|
||||||
self.client.indices.refresh(index=self.index_name)
|
self.client.indices.refresh(index=self.index_name)
|
||||||
|
return ids
|
||||||
|
|
||||||
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
||||||
"""Return docs most similar to query.
|
"""Return docs most similar to query.
|
||||||
|
@ -41,8 +41,16 @@ class FAISS(VectorStore):
|
|||||||
|
|
||||||
def add_texts(
|
def add_texts(
|
||||||
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
|
||||||
) -> None:
|
) -> List[str]:
|
||||||
"""Run more texts through the embeddings and add to the vectorstore."""
|
"""Run more texts through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: Iterable of strings to add to the vectorstore.
|
||||||
|
metadatas: Optional list of metadatas associated with the texts.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ids from adding the texts into the vectorstore.
|
||||||
|
"""
|
||||||
if not isinstance(self.docstore, AddableMixin):
|
if not isinstance(self.docstore, AddableMixin):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"If trying to add texts, the underlying docstore should support "
|
"If trying to add texts, the underlying docstore should support "
|
||||||
@ -66,6 +74,7 @@ class FAISS(VectorStore):
|
|||||||
self.docstore.add({_id: doc for _, _id, doc in full_info})
|
self.docstore.add({_id: doc for _, _id, doc in full_info})
|
||||||
index_to_id = {index: _id for index, _id, _ in full_info}
|
index_to_id = {index: _id for index, _id, _ in full_info}
|
||||||
self.index_to_docstore_id.update(index_to_id)
|
self.index_to_docstore_id.update(index_to_id)
|
||||||
|
return [_id for _, _id, _ in full_info]
|
||||||
|
|
||||||
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
def similarity_search(self, query: str, k: int = 4) -> List[Document]:
|
||||||
"""Return docs most similar to query.
|
"""Return docs most similar to query.
|
||||||
|
Loading…
Reference in New Issue
Block a user