mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-14 15:16:21 +00:00
community[patch]: update for compatibility with latest Meilisearch version (#18970)
- **Description:** Updates Meilisearch vectorstore for compatibility with v1.6 and above. Adds embedders settings and embedder_name which are now required. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
be2adb1083
commit
cf96060ab7
@ -130,7 +130,14 @@
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"embedders = {\n",
|
||||
" \"default\": {\n",
|
||||
" \"source\": \"userProvided\",\n",
|
||||
" \"dimensions\": 1536,\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"embedder_name = \"default\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -152,7 +159,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use Meilisearch vector store to store texts & associated embeddings as vector\n",
|
||||
"vector_store = Meilisearch.from_texts(texts=texts, embedding=embeddings)"
|
||||
"vector_store = Meilisearch.from_texts(\n",
|
||||
" texts=texts, embedding=embeddings, embedders=embedders, embedder_name=embedder_name\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -188,11 +197,16 @@
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"# Import documents & embeddings in the vector store\n",
|
||||
"vector_store = Meilisearch.from_documents(documents=documents, embedding=embeddings)\n",
|
||||
"vector_store = Meilisearch.from_documents(\n",
|
||||
" documents=documents,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" embedders=embedders,\n",
|
||||
" embedder_name=embedder_name,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Search in our vector store\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vector_store.similarity_search(query)\n",
|
||||
"docs = vector_store.similarity_search(query, embedder_name=embedder_name)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
@ -221,7 +235,11 @@
|
||||
"\n",
|
||||
"client = meilisearch.Client(url=\"http://127.0.0.1:7700\", api_key=\"***\")\n",
|
||||
"vector_store = Meilisearch(\n",
|
||||
" embedding=embeddings, client=client, index_name=\"langchain_demo\", text_key=\"text\"\n",
|
||||
" embedding=embeddings,\n",
|
||||
" embedders=embedders,\n",
|
||||
" client=client,\n",
|
||||
" index_name=\"langchain_demo\",\n",
|
||||
" text_key=\"text\",\n",
|
||||
")\n",
|
||||
"vector_store.add_documents(documents)"
|
||||
]
|
||||
@ -232,7 +250,7 @@
|
||||
"source": [
|
||||
"## Similarity Search with score\n",
|
||||
"\n",
|
||||
"This specific method allows you to return the documents and the distance score of the query to them."
|
||||
"This specific method allows you to return the documents and the distance score of the query to them. `embedder_name` is the name of the embedder that should be used for semantic search, defaults to \"default\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -241,7 +259,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_and_scores = vector_store.similarity_search_with_score(query)\n",
|
||||
"docs_and_scores = vector_store.similarity_search_with_score(\n",
|
||||
" query, embedder_name=embedder_name\n",
|
||||
")\n",
|
||||
"docs_and_scores[0]"
|
||||
]
|
||||
},
|
||||
@ -249,7 +269,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Search by vector"
|
||||
"## Similarity Search by vector\n",
|
||||
"`embedder_name` is the name of the embedder that should be used for semantic search, defaults to \"default\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -259,7 +280,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embedding_vector = embeddings.embed_query(query)\n",
|
||||
"docs_and_scores = vector_store.similarity_search_by_vector(embedding_vector)\n",
|
||||
"docs_and_scores = vector_store.similarity_search_by_vector(\n",
|
||||
" embedding_vector, embedder_name=embedder_name\n",
|
||||
")\n",
|
||||
"docs_and_scores[0]"
|
||||
]
|
||||
},
|
||||
|
@ -65,8 +65,15 @@ class Meilisearch(VectorStore):
|
||||
# api_key is optional; provide it if your meilisearch instance requires it
|
||||
client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***')
|
||||
embeddings = OpenAIEmbeddings()
|
||||
embedders = {
|
||||
"theEmbedderName": {
|
||||
"source": "userProvided",
|
||||
"dimensions": "1536"
|
||||
}
|
||||
}
|
||||
vectorstore = Meilisearch(
|
||||
embedding=embeddings,
|
||||
embedders=embedders,
|
||||
client=client,
|
||||
index_name='langchain_demo',
|
||||
text_key='text')
|
||||
@ -81,6 +88,8 @@ class Meilisearch(VectorStore):
|
||||
index_name: str = "langchain-demo",
|
||||
text_key: str = "text",
|
||||
metadata_key: str = "metadata",
|
||||
*,
|
||||
embedders: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
"""Initialize with Meilisearch client."""
|
||||
client = _create_client(client=client, url=url, api_key=api_key)
|
||||
@ -90,18 +99,24 @@ class Meilisearch(VectorStore):
|
||||
self._embedding = embedding
|
||||
self._text_key = text_key
|
||||
self._metadata_key = metadata_key
|
||||
self._embedders = embedders
|
||||
self._embedders_settings = self._client.index(
|
||||
str(self._index_name)
|
||||
).update_embedders(embedders)
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
embedder_name: Optional[str] = "default",
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embedding and add them to the vector store.
|
||||
|
||||
Args:
|
||||
texts (Iterable[str]): Iterable of strings/text to add to the vectorstore.
|
||||
embedder_name: Name of the embedder. Defaults to "default".
|
||||
metadatas (Optional[List[dict]]): Optional list of metadata.
|
||||
Defaults to None.
|
||||
ids Optional[List[str]]: Optional list of IDs.
|
||||
@ -128,7 +143,7 @@ class Meilisearch(VectorStore):
|
||||
docs.append(
|
||||
{
|
||||
"id": id,
|
||||
"_vectors": embedding,
|
||||
"_vectors": {f"{embedder_name}": embedding},
|
||||
f"{self._metadata_key}": metadata,
|
||||
}
|
||||
)
|
||||
@ -142,12 +157,14 @@ class Meilisearch(VectorStore):
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
embedder_name: Optional[str] = "default",
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return meilisearch documents most similar to the query.
|
||||
|
||||
Args:
|
||||
query (str): Query text for which to find similar documents.
|
||||
embedder_name: Name of the embedder to be used. Defaults to "default".
|
||||
k (int): Number of documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata.
|
||||
Defaults to None.
|
||||
@ -158,6 +175,7 @@ class Meilisearch(VectorStore):
|
||||
"""
|
||||
docs_and_scores = self.similarity_search_with_score(
|
||||
query=query,
|
||||
embedder_name=embedder_name,
|
||||
k=k,
|
||||
filter=filter,
|
||||
kwargs=kwargs,
|
||||
@ -169,12 +187,14 @@ class Meilisearch(VectorStore):
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
embedder_name: Optional[str] = "default",
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return meilisearch documents most similar to the query, along with scores.
|
||||
|
||||
Args:
|
||||
query (str): Query text for which to find similar documents.
|
||||
embedder_name: Name of the embedder to be used. Defaults to "default".
|
||||
k (int): Number of documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata.
|
||||
Defaults to None.
|
||||
@ -187,6 +207,7 @@ class Meilisearch(VectorStore):
|
||||
|
||||
docs = self.similarity_search_by_vector_with_scores(
|
||||
embedding=_query,
|
||||
embedder_name=embedder_name,
|
||||
k=k,
|
||||
filter=filter,
|
||||
kwargs=kwargs,
|
||||
@ -196,6 +217,7 @@ class Meilisearch(VectorStore):
|
||||
def similarity_search_by_vector_with_scores(
|
||||
self,
|
||||
embedding: List[float],
|
||||
embedder_name: Optional[str] = "default",
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
@ -204,6 +226,7 @@ class Meilisearch(VectorStore):
|
||||
|
||||
Args:
|
||||
embedding (List[float]): Embedding to look up similar documents.
|
||||
embedder_name: Name of the embedder to be used. Defaults to "default".
|
||||
k (int): Number of documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata.
|
||||
Defaults to None.
|
||||
@ -214,7 +237,13 @@ class Meilisearch(VectorStore):
|
||||
"""
|
||||
docs = []
|
||||
results = self._client.index(str(self._index_name)).search(
|
||||
"", {"vector": embedding, "limit": k, "filter": filter}
|
||||
"",
|
||||
{
|
||||
"vector": embedding,
|
||||
"hybrid": {"semanticRatio": 1.0, "embedder": embedder_name},
|
||||
"limit": k,
|
||||
"filter": filter,
|
||||
},
|
||||
)
|
||||
|
||||
for result in results["hits"]:
|
||||
@ -233,12 +262,14 @@ class Meilisearch(VectorStore):
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
embedder_name: Optional[str] = "default",
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return meilisearch documents most similar to embedding vector.
|
||||
|
||||
Args:
|
||||
embedding (List[float]): Embedding to look up similar documents.
|
||||
embedder_name: Name of the embedder to be used. Defaults to "default".
|
||||
k (int): Number of documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata.
|
||||
Defaults to None.
|
||||
@ -249,6 +280,7 @@ class Meilisearch(VectorStore):
|
||||
"""
|
||||
docs = self.similarity_search_by_vector_with_scores(
|
||||
embedding=embedding,
|
||||
embedder_name=embedder_name,
|
||||
k=k,
|
||||
filter=filter,
|
||||
kwargs=kwargs,
|
||||
@ -268,6 +300,8 @@ class Meilisearch(VectorStore):
|
||||
ids: Optional[List[str]] = None,
|
||||
text_key: Optional[str] = "text",
|
||||
metadata_key: Optional[str] = "metadata",
|
||||
embedders: Dict[str, Any] = {},
|
||||
embedder_name: Optional[str] = "default",
|
||||
**kwargs: Any,
|
||||
) -> Meilisearch:
|
||||
"""Construct Meilisearch wrapper from raw documents.
|
||||
@ -288,21 +322,25 @@ class Meilisearch(VectorStore):
|
||||
# The environment should be the one specified next to the API key
|
||||
# in your Meilisearch console
|
||||
client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***')
|
||||
embeddings = OpenAIEmbeddings()
|
||||
embedding = OpenAIEmbeddings()
|
||||
embedders: Embedders index setting.
|
||||
embedder_name: Name of the embedder. Defaults to "default".
|
||||
docsearch = Meilisearch.from_texts(
|
||||
client=client,
|
||||
embeddings=embeddings,
|
||||
embedding=embedding,
|
||||
)
|
||||
"""
|
||||
client = _create_client(client=client, url=url, api_key=api_key)
|
||||
|
||||
vectorstore = cls(
|
||||
embedding=embedding,
|
||||
embedders=embedders,
|
||||
client=client,
|
||||
index_name=index_name,
|
||||
)
|
||||
vectorstore.add_texts(
|
||||
texts=texts,
|
||||
embedder_name=embedder_name,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
text_key=text_key,
|
||||
|
@ -1,5 +1,6 @@
|
||||
"""Test Meilisearch functionality."""
|
||||
from typing import TYPE_CHECKING, Generator
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, Generator
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
@ -33,6 +34,16 @@ class TestMeilisearchVectorSearch:
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def new_embedders(self) -> Dict[str, Dict[str, Any]]:
|
||||
return {
|
||||
"default": {
|
||||
"source": "userProvided",
|
||||
# Dimension defined in FakeEmbeddings as [float(1.0)] * 9 + [float(0.0)]
|
||||
"dimensions": 10,
|
||||
}
|
||||
}
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self) -> None:
|
||||
self.delete_all_indexes()
|
||||
@ -63,12 +74,14 @@ class TestMeilisearchVectorSearch:
|
||||
# Wait for the last task to be completed
|
||||
client.wait_for_task(tasks.results[0].uid)
|
||||
|
||||
def test_meilisearch(self) -> None:
|
||||
def test_meilisearch(self, new_embedders: Dict[str, Any]) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
vectorstore = Meilisearch.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddings(),
|
||||
embedders=new_embedders,
|
||||
embedder_name=list(new_embedders)[0],
|
||||
url=TEST_MEILI_HTTP_ADDR,
|
||||
api_key=TEST_MEILI_MASTER_KEY,
|
||||
index_name=INDEX_NAME,
|
||||
@ -77,12 +90,14 @@ class TestMeilisearchVectorSearch:
|
||||
output = vectorstore.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_meilisearch_with_client(self) -> None:
|
||||
def test_meilisearch_with_client(self, new_embedders: Dict[str, Any]) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
vectorstore = Meilisearch.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddings(),
|
||||
embedders=new_embedders,
|
||||
embedder_name=list(new_embedders)[0],
|
||||
client=self.client(),
|
||||
index_name=INDEX_NAME,
|
||||
)
|
||||
@ -90,13 +105,15 @@ class TestMeilisearchVectorSearch:
|
||||
output = vectorstore.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_meilisearch_with_metadatas(self) -> None:
|
||||
def test_meilisearch_with_metadatas(self, new_embedders: Dict[str, Any]) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Meilisearch.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddings(),
|
||||
embedders=new_embedders,
|
||||
embedder_name=list(new_embedders)[0],
|
||||
url=TEST_MEILI_HTTP_ADDR,
|
||||
api_key=TEST_MEILI_MASTER_KEY,
|
||||
index_name=INDEX_NAME,
|
||||
@ -109,13 +126,17 @@ class TestMeilisearchVectorSearch:
|
||||
assert output[0].metadata["page"] == 0
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
def test_meilisearch_with_metadatas_with_scores(self) -> None:
|
||||
def test_meilisearch_with_metadatas_with_scores(
|
||||
self, new_embedders: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Test end to end construction and scored search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||
docsearch = Meilisearch.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddings(),
|
||||
embedders=new_embedders,
|
||||
embedder_name=list(new_embedders)[0],
|
||||
url=TEST_MEILI_HTTP_ADDR,
|
||||
api_key=TEST_MEILI_MASTER_KEY,
|
||||
index_name=INDEX_NAME,
|
||||
@ -123,9 +144,11 @@ class TestMeilisearchVectorSearch:
|
||||
)
|
||||
self._wait_last_task()
|
||||
output = docsearch.similarity_search_with_score("foo", k=1)
|
||||
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 9.0)]
|
||||
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 1.0)]
|
||||
|
||||
def test_meilisearch_with_metadatas_with_scores_using_vector(self) -> None:
|
||||
def test_meilisearch_with_metadatas_with_scores_using_vector(
|
||||
self, new_embedders: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Test end to end construction and scored search, using embedding vector."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||
@ -134,6 +157,8 @@ class TestMeilisearchVectorSearch:
|
||||
docsearch = Meilisearch.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddings(),
|
||||
embedders=new_embedders,
|
||||
embedder_name=list(new_embedders)[0],
|
||||
url=TEST_MEILI_HTTP_ADDR,
|
||||
api_key=TEST_MEILI_MASTER_KEY,
|
||||
index_name=INDEX_NAME,
|
||||
@ -144,4 +169,4 @@ class TestMeilisearchVectorSearch:
|
||||
output = docsearch.similarity_search_by_vector_with_scores(
|
||||
embedding=embedded_query, k=1
|
||||
)
|
||||
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 9.0)]
|
||||
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 1.0)]
|
||||
|
Loading…
Reference in New Issue
Block a user