diff --git a/docs/docs/integrations/vectorstores/meilisearch.ipynb b/docs/docs/integrations/vectorstores/meilisearch.ipynb index 11777cceda8..58ed11a5681 100644 --- a/docs/docs/integrations/vectorstores/meilisearch.ipynb +++ b/docs/docs/integrations/vectorstores/meilisearch.ipynb @@ -130,7 +130,14 @@ "from langchain_openai import OpenAIEmbeddings\n", "from langchain_text_splitters import CharacterTextSplitter\n", "\n", - "embeddings = OpenAIEmbeddings()" + "embeddings = OpenAIEmbeddings()\n", + "embedders = {\n", + " \"default\": {\n", + " \"source\": \"userProvided\",\n", + " \"dimensions\": 1536,\n", + " }\n", + "}\n", + "embedder_name = \"default\"" ] }, { @@ -152,7 +159,9 @@ "outputs": [], "source": [ "# Use Meilisearch vector store to store texts & associated embeddings as vector\n", - "vector_store = Meilisearch.from_texts(texts=texts, embedding=embeddings)" + "vector_store = Meilisearch.from_texts(\n", + " texts=texts, embedding=embeddings, embedders=embedders, embedder_name=embedder_name\n", + ")" ] }, { @@ -188,11 +197,16 @@ "docs = text_splitter.split_documents(documents)\n", "\n", "# Import documents & embeddings in the vector store\n", - "vector_store = Meilisearch.from_documents(documents=documents, embedding=embeddings)\n", + "vector_store = Meilisearch.from_documents(\n", + " documents=documents,\n", + " embedding=embeddings,\n", + " embedders=embedders,\n", + " embedder_name=embedder_name,\n", + ")\n", "\n", "# Search in our vector store\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "docs = vector_store.similarity_search(query)\n", + "docs = vector_store.similarity_search(query, embedder_name=embedder_name)\n", "print(docs[0].page_content)" ] }, @@ -221,7 +235,11 @@ "\n", "client = meilisearch.Client(url=\"http://127.0.0.1:7700\", api_key=\"***\")\n", "vector_store = Meilisearch(\n", - " embedding=embeddings, client=client, index_name=\"langchain_demo\", text_key=\"text\"\n", + " embedding=embeddings,\n", + " embedders=embedders,\n", + " client=client,\n", + " index_name=\"langchain_demo\",\n", + " text_key=\"text\",\n", ")\n", "vector_store.add_documents(documents)" ] @@ -232,7 +250,7 @@ "source": [ "## Similarity Search with score\n", "\n", - "This specific method allows you to return the documents and the distance score of the query to them." + "This specific method allows you to return the documents and the distance score of the query to them. `embedder_name` is the name of the embedder that should be used for semantic search, defaults to \"default\"." ] }, { @@ -241,7 +259,9 @@ "metadata": {}, "outputs": [], "source": [ - "docs_and_scores = vector_store.similarity_search_with_score(query)\n", + "docs_and_scores = vector_store.similarity_search_with_score(\n", + " query, embedder_name=embedder_name\n", + ")\n", "docs_and_scores[0]" ] }, @@ -249,7 +269,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Similarity Search by vector" + "## Similarity Search by vector\n", + "`embedder_name` is the name of the embedder that should be used for semantic search, defaults to \"default\"." ] }, { @@ -259,7 +280,9 @@ "outputs": [], "source": [ "embedding_vector = embeddings.embed_query(query)\n", - "docs_and_scores = vector_store.similarity_search_by_vector(embedding_vector)\n", + "docs_and_scores = vector_store.similarity_search_by_vector(\n", + " embedding_vector, embedder_name=embedder_name\n", + ")\n", "docs_and_scores[0]" ] }, diff --git a/libs/community/langchain_community/vectorstores/meilisearch.py b/libs/community/langchain_community/vectorstores/meilisearch.py index b34a990cce2..522f107405d 100644 --- a/libs/community/langchain_community/vectorstores/meilisearch.py +++ b/libs/community/langchain_community/vectorstores/meilisearch.py @@ -65,8 +65,15 @@ class Meilisearch(VectorStore): # api_key is optional; provide it if your meilisearch instance requires it client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***') embeddings = OpenAIEmbeddings() + embedders = { + "theEmbedderName": { + "source": "userProvided", + "dimensions": "1536" + } + } vectorstore = Meilisearch( embedding=embeddings, + embedders=embedders, client=client, index_name='langchain_demo', text_key='text') @@ -81,6 +88,8 @@ class Meilisearch(VectorStore): index_name: str = "langchain-demo", text_key: str = "text", metadata_key: str = "metadata", + *, + embedders: Optional[Dict[str, Any]] = None, ): """Initialize with Meilisearch client.""" client = _create_client(client=client, url=url, api_key=api_key) @@ -90,18 +99,24 @@ class Meilisearch(VectorStore): self._embedding = embedding self._text_key = text_key self._metadata_key = metadata_key + self._embedders = embedders + self._embedders_settings = self._client.index( + str(self._index_name) + ).update_embedders(embedders) def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, + embedder_name: Optional[str] = "default", **kwargs: Any, ) -> List[str]: """Run more texts through the embedding and add them to the vector store. Args: texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. + embedder_name: Name of the embedder. Defaults to "default". metadatas (Optional[List[dict]]): Optional list of metadata. Defaults to None. ids Optional[List[str]]: Optional list of IDs. @@ -128,7 +143,7 @@ class Meilisearch(VectorStore): docs.append( { "id": id, - "_vectors": embedding, + "_vectors": {f"{embedder_name}": embedding}, f"{self._metadata_key}": metadata, } ) @@ -142,12 +157,14 @@ class Meilisearch(VectorStore): query: str, k: int = 4, filter: Optional[Dict[str, str]] = None, + embedder_name: Optional[str] = "default", **kwargs: Any, ) -> List[Document]: """Return meilisearch documents most similar to the query. Args: query (str): Query text for which to find similar documents. + embedder_name: Name of the embedder to be used. Defaults to "default". k (int): Number of documents to return. Defaults to 4. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. @@ -158,6 +175,7 @@ class Meilisearch(VectorStore): """ docs_and_scores = self.similarity_search_with_score( query=query, + embedder_name=embedder_name, k=k, filter=filter, kwargs=kwargs, @@ -169,12 +187,14 @@ class Meilisearch(VectorStore): query: str, k: int = 4, filter: Optional[Dict[str, str]] = None, + embedder_name: Optional[str] = "default", **kwargs: Any, ) -> List[Tuple[Document, float]]: """Return meilisearch documents most similar to the query, along with scores. Args: query (str): Query text for which to find similar documents. + embedder_name: Name of the embedder to be used. Defaults to "default". k (int): Number of documents to return. Defaults to 4. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. @@ -187,6 +207,7 @@ class Meilisearch(VectorStore): docs = self.similarity_search_by_vector_with_scores( embedding=_query, + embedder_name=embedder_name, k=k, filter=filter, kwargs=kwargs, @@ -196,6 +217,7 @@ class Meilisearch(VectorStore): def similarity_search_by_vector_with_scores( self, embedding: List[float], + embedder_name: Optional[str] = "default", k: int = 4, filter: Optional[Dict[str, Any]] = None, **kwargs: Any, @@ -204,6 +226,7 @@ class Meilisearch(VectorStore): Args: embedding (List[float]): Embedding to look up similar documents. + embedder_name: Name of the embedder to be used. Defaults to "default". k (int): Number of documents to return. Defaults to 4. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. @@ -214,7 +237,13 @@ class Meilisearch(VectorStore): """ docs = [] results = self._client.index(str(self._index_name)).search( - "", {"vector": embedding, "limit": k, "filter": filter} + "", + { + "vector": embedding, + "hybrid": {"semanticRatio": 1.0, "embedder": embedder_name}, + "limit": k, + "filter": filter, + }, ) for result in results["hits"]: @@ -233,12 +262,14 @@ class Meilisearch(VectorStore): embedding: List[float], k: int = 4, filter: Optional[Dict[str, str]] = None, + embedder_name: Optional[str] = "default", **kwargs: Any, ) -> List[Document]: """Return meilisearch documents most similar to embedding vector. Args: embedding (List[float]): Embedding to look up similar documents. + embedder_name: Name of the embedder to be used. Defaults to "default". k (int): Number of documents to return. Defaults to 4. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. @@ -249,6 +280,7 @@ class Meilisearch(VectorStore): """ docs = self.similarity_search_by_vector_with_scores( embedding=embedding, + embedder_name=embedder_name, k=k, filter=filter, kwargs=kwargs, @@ -268,6 +300,8 @@ class Meilisearch(VectorStore): ids: Optional[List[str]] = None, text_key: Optional[str] = "text", metadata_key: Optional[str] = "metadata", + embedders: Dict[str, Any] = {}, + embedder_name: Optional[str] = "default", **kwargs: Any, ) -> Meilisearch: """Construct Meilisearch wrapper from raw documents. @@ -288,21 +322,25 @@ class Meilisearch(VectorStore): # The environment should be the one specified next to the API key # in your Meilisearch console client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***') - embeddings = OpenAIEmbeddings() + embedding = OpenAIEmbeddings() + embedders: Embedders index setting. + embedder_name: Name of the embedder. Defaults to "default". docsearch = Meilisearch.from_texts( client=client, - embeddings=embeddings, + embedding=embedding, ) """ client = _create_client(client=client, url=url, api_key=api_key) vectorstore = cls( embedding=embedding, + embedders=embedders, client=client, index_name=index_name, ) vectorstore.add_texts( texts=texts, + embedder_name=embedder_name, metadatas=metadatas, ids=ids, text_key=text_key, diff --git a/libs/community/tests/integration_tests/vectorstores/test_meilisearch.py b/libs/community/tests/integration_tests/vectorstores/test_meilisearch.py index 1dd795f74ca..3b6695dcb40 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_meilisearch.py +++ b/libs/community/tests/integration_tests/vectorstores/test_meilisearch.py @@ -1,5 +1,6 @@ """Test Meilisearch functionality.""" -from typing import TYPE_CHECKING, Generator + +from typing import TYPE_CHECKING, Any, Dict, Generator import pytest import requests @@ -33,6 +34,16 @@ class TestMeilisearchVectorSearch: timeout=10, ) + @pytest.fixture + def new_embedders(self) -> Dict[str, Dict[str, Any]]: + return { + "default": { + "source": "userProvided", + # Dimension defined in FakeEmbeddings as [float(1.0)] * 9 + [float(0.0)] + "dimensions": 10, + } + } + @pytest.fixture(autouse=True) def setup(self) -> None: self.delete_all_indexes() @@ -63,12 +74,14 @@ class TestMeilisearchVectorSearch: # Wait for the last task to be completed client.wait_for_task(tasks.results[0].uid) - def test_meilisearch(self) -> None: + def test_meilisearch(self, new_embedders: Dict[str, Any]) -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] vectorstore = Meilisearch.from_texts( texts=texts, embedding=FakeEmbeddings(), + embedders=new_embedders, + embedder_name=list(new_embedders)[0], url=TEST_MEILI_HTTP_ADDR, api_key=TEST_MEILI_MASTER_KEY, index_name=INDEX_NAME, @@ -77,12 +90,14 @@ class TestMeilisearchVectorSearch: output = vectorstore.similarity_search("foo", k=1) assert output == [Document(page_content="foo")] - def test_meilisearch_with_client(self) -> None: + def test_meilisearch_with_client(self, new_embedders: Dict[str, Any]) -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] vectorstore = Meilisearch.from_texts( texts=texts, embedding=FakeEmbeddings(), + embedders=new_embedders, + embedder_name=list(new_embedders)[0], client=self.client(), index_name=INDEX_NAME, ) @@ -90,13 +105,15 @@ class TestMeilisearchVectorSearch: output = vectorstore.similarity_search("foo", k=1) assert output == [Document(page_content="foo")] - def test_meilisearch_with_metadatas(self) -> None: + def test_meilisearch_with_metadatas(self, new_embedders: Dict[str, Any]) -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = Meilisearch.from_texts( texts=texts, embedding=FakeEmbeddings(), + embedders=new_embedders, + embedder_name=list(new_embedders)[0], url=TEST_MEILI_HTTP_ADDR, api_key=TEST_MEILI_MASTER_KEY, index_name=INDEX_NAME, @@ -109,13 +126,17 @@ class TestMeilisearchVectorSearch: assert output[0].metadata["page"] == 0 assert output == [Document(page_content="foo", metadata={"page": 0})] - def test_meilisearch_with_metadatas_with_scores(self) -> None: + def test_meilisearch_with_metadatas_with_scores( + self, new_embedders: Dict[str, Any] + ) -> None: """Test end to end construction and scored search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": str(i)} for i in range(len(texts))] docsearch = Meilisearch.from_texts( texts=texts, embedding=FakeEmbeddings(), + embedders=new_embedders, + embedder_name=list(new_embedders)[0], url=TEST_MEILI_HTTP_ADDR, api_key=TEST_MEILI_MASTER_KEY, index_name=INDEX_NAME, @@ -123,9 +144,11 @@ class TestMeilisearchVectorSearch: ) self._wait_last_task() output = docsearch.similarity_search_with_score("foo", k=1) - assert output == [(Document(page_content="foo", metadata={"page": "0"}), 9.0)] + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 1.0)] - def test_meilisearch_with_metadatas_with_scores_using_vector(self) -> None: + def test_meilisearch_with_metadatas_with_scores_using_vector( + self, new_embedders: Dict[str, Any] + ) -> None: """Test end to end construction and scored search, using embedding vector.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": str(i)} for i in range(len(texts))] @@ -134,6 +157,8 @@ class TestMeilisearchVectorSearch: docsearch = Meilisearch.from_texts( texts=texts, embedding=FakeEmbeddings(), + embedders=new_embedders, + embedder_name=list(new_embedders)[0], url=TEST_MEILI_HTTP_ADDR, api_key=TEST_MEILI_MASTER_KEY, index_name=INDEX_NAME, @@ -144,4 +169,4 @@ class TestMeilisearchVectorSearch: output = docsearch.similarity_search_by_vector_with_scores( embedding=embedded_query, k=1 ) - assert output == [(Document(page_content="foo", metadata={"page": "0"}), 9.0)] + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 1.0)]