community[patch]: update for compatibility with latest Meilisearch version (#18970)

- **Description:** Updates Meilisearch vectorstore for compatibility
with v1.6 and above. Adds embedders settings and embedder_name which are
now required.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
CaroFG
2024-03-27 22:08:27 +00:00
committed by GitHub
parent be2adb1083
commit cf96060ab7
3 changed files with 107 additions and 21 deletions

View File

@@ -65,8 +65,15 @@ class Meilisearch(VectorStore):
# api_key is optional; provide it if your meilisearch instance requires it
client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***')
embeddings = OpenAIEmbeddings()
embedders = {
"theEmbedderName": {
"source": "userProvided",
"dimensions": "1536"
}
}
vectorstore = Meilisearch(
embedding=embeddings,
embedders=embedders,
client=client,
index_name='langchain_demo',
text_key='text')
@@ -81,6 +88,8 @@ class Meilisearch(VectorStore):
index_name: str = "langchain-demo",
text_key: str = "text",
metadata_key: str = "metadata",
*,
embedders: Optional[Dict[str, Any]] = None,
):
"""Initialize with Meilisearch client."""
client = _create_client(client=client, url=url, api_key=api_key)
@@ -90,18 +99,24 @@ class Meilisearch(VectorStore):
self._embedding = embedding
self._text_key = text_key
self._metadata_key = metadata_key
self._embedders = embedders
self._embedders_settings = self._client.index(
str(self._index_name)
).update_embedders(embedders)
def add_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
ids: Optional[List[str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[str]:
"""Run more texts through the embedding and add them to the vector store.
Args:
texts (Iterable[str]): Iterable of strings/text to add to the vectorstore.
embedder_name: Name of the embedder. Defaults to "default".
metadatas (Optional[List[dict]]): Optional list of metadata.
Defaults to None.
ids Optional[List[str]]: Optional list of IDs.
@@ -128,7 +143,7 @@ class Meilisearch(VectorStore):
docs.append(
{
"id": id,
"_vectors": embedding,
"_vectors": {f"{embedder_name}": embedding},
f"{self._metadata_key}": metadata,
}
)
@@ -142,12 +157,14 @@ class Meilisearch(VectorStore):
query: str,
k: int = 4,
filter: Optional[Dict[str, str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[Document]:
"""Return meilisearch documents most similar to the query.
Args:
query (str): Query text for which to find similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
@@ -158,6 +175,7 @@ class Meilisearch(VectorStore):
"""
docs_and_scores = self.similarity_search_with_score(
query=query,
embedder_name=embedder_name,
k=k,
filter=filter,
kwargs=kwargs,
@@ -169,12 +187,14 @@ class Meilisearch(VectorStore):
query: str,
k: int = 4,
filter: Optional[Dict[str, str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return meilisearch documents most similar to the query, along with scores.
Args:
query (str): Query text for which to find similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
@@ -187,6 +207,7 @@ class Meilisearch(VectorStore):
docs = self.similarity_search_by_vector_with_scores(
embedding=_query,
embedder_name=embedder_name,
k=k,
filter=filter,
kwargs=kwargs,
@@ -196,6 +217,7 @@ class Meilisearch(VectorStore):
def similarity_search_by_vector_with_scores(
self,
embedding: List[float],
embedder_name: Optional[str] = "default",
k: int = 4,
filter: Optional[Dict[str, Any]] = None,
**kwargs: Any,
@@ -204,6 +226,7 @@ class Meilisearch(VectorStore):
Args:
embedding (List[float]): Embedding to look up similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
@@ -214,7 +237,13 @@ class Meilisearch(VectorStore):
"""
docs = []
results = self._client.index(str(self._index_name)).search(
"", {"vector": embedding, "limit": k, "filter": filter}
"",
{
"vector": embedding,
"hybrid": {"semanticRatio": 1.0, "embedder": embedder_name},
"limit": k,
"filter": filter,
},
)
for result in results["hits"]:
@@ -233,12 +262,14 @@ class Meilisearch(VectorStore):
embedding: List[float],
k: int = 4,
filter: Optional[Dict[str, str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[Document]:
"""Return meilisearch documents most similar to embedding vector.
Args:
embedding (List[float]): Embedding to look up similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
@@ -249,6 +280,7 @@ class Meilisearch(VectorStore):
"""
docs = self.similarity_search_by_vector_with_scores(
embedding=embedding,
embedder_name=embedder_name,
k=k,
filter=filter,
kwargs=kwargs,
@@ -268,6 +300,8 @@ class Meilisearch(VectorStore):
ids: Optional[List[str]] = None,
text_key: Optional[str] = "text",
metadata_key: Optional[str] = "metadata",
embedders: Dict[str, Any] = {},
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> Meilisearch:
"""Construct Meilisearch wrapper from raw documents.
@@ -288,21 +322,25 @@ class Meilisearch(VectorStore):
# The environment should be the one specified next to the API key
# in your Meilisearch console
client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***')
embeddings = OpenAIEmbeddings()
embedding = OpenAIEmbeddings()
embedders: Embedders index setting.
embedder_name: Name of the embedder. Defaults to "default".
docsearch = Meilisearch.from_texts(
client=client,
embeddings=embeddings,
embedding=embedding,
)
"""
client = _create_client(client=client, url=url, api_key=api_key)
vectorstore = cls(
embedding=embedding,
embedders=embedders,
client=client,
index_name=index_name,
)
vectorstore.add_texts(
texts=texts,
embedder_name=embedder_name,
metadatas=metadatas,
ids=ids,
text_key=text_key,