From 973866c89405dc19c520b7608408c1bdfaa76b16 Mon Sep 17 00:00:00 2001 From: Raynor Chavez Date: Tue, 22 Aug 2023 01:43:15 +0800 Subject: [PATCH] fix: Updated marqo integration for marqo version 1.0.0+ (#9521) - Description: Updated marqo integration to use tensor_fields instead of non_tensor_fields. Upgraded marqo version to 1.2.4 - Dependencies: marqo 1.2.4 --------- Co-authored-by: Raynor Kirkson E. Chavez Co-authored-by: Bagatur --- .../langchain/langchain/vectorstores/marqo.py | 20 +++++++++---------- libs/langchain/poetry.lock | 12 ++++++----- libs/langchain/pyproject.toml | 2 +- .../vectorstores/test_marqo.py | 1 + 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/libs/langchain/langchain/vectorstores/marqo.py b/libs/langchain/langchain/vectorstores/marqo.py index e3873e4e15b..0d1dfe0489e 100644 --- a/libs/langchain/langchain/vectorstores/marqo.py +++ b/libs/langchain/langchain/vectorstores/marqo.py @@ -78,7 +78,7 @@ class Marqo(VectorStore): self._searchable_attributes = searchable_attributes self.page_content_builder = page_content_builder - self._non_tensor_fields = ["metadata"] + self.tensor_fields = ["text"] self._document_batch_size = 1024 @@ -132,7 +132,7 @@ class Marqo(VectorStore): for i in range(0, num_docs, self._document_batch_size): response = self._client.index(self._index_name).add_documents( documents[i : i + self._document_batch_size], - non_tensor_fields=self._non_tensor_fields, + tensor_fields=self.tensor_fields, **self._add_documents_settings, ) if response["errors"]: @@ -330,17 +330,15 @@ class Marqo(VectorStore): Dict[str, Dict[List[Dict[str, Dict[str, Any]]]]]: A bulk search results object """ - bulk_results = self._client.bulk_search( - [ - { - "index": self._index_name, - "q": query, - "searchableAttributes": self._searchable_attributes, - "limit": k, - } + bulk_results = { + "result": [ + self._client.index(self._index_name).search( + q=query, searchable_attributes=self._searchable_attributes, limit=k + ) for query in queries ] - ) + } + return bulk_results @classmethod diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index 9305a0838f8..a0a76d09799 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -4407,19 +4407,21 @@ files = [ [[package]] name = "marqo" -version = "0.11.0" +version = "1.2.4" description = "Tensor search for humans" category = "main" optional = true python-versions = ">=3" files = [ - {file = "marqo-0.11.0-py3-none-any.whl", hash = "sha256:e1a5409beeb02dcec725566cfbc5fd88a84ce65ca7bce08a1120f8082badeab4"}, - {file = "marqo-0.11.0.tar.gz", hash = "sha256:808e691cf06f5f7d67d422dc7f5f6fcc53b9acc6a4bc000abbcae8a817fd765d"}, + {file = "marqo-1.2.4-py3-none-any.whl", hash = "sha256:aaf59ca35214febaa893e102828a50ab9e53fe57201cd43714ab7c0515166068"}, + {file = "marqo-1.2.4.tar.gz", hash = "sha256:3fe0eb8e1ed73883fd8e6001582d18dab6e149d79e41b92a1403b2ff52d18c43"}, ] [package.dependencies] -pydantic = "*" +packaging = "*" +pydantic = "<2.0.0" requests = "*" +typing-extensions = ">=4.5.0" urllib3 = "*" [[package]] @@ -10487,4 +10489,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "a5e3458dd0cabcefd83caec6eb33b6fb593c2c347ca1d33c1f182341e852a9c8" +content-hash = "0247674f3f274fd2249ceb02c23a468f911a7c482796ea67252b203d1ab938ae" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index d74fda8f2f0..8c3c8c18df2 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -37,7 +37,7 @@ pinecone-text = {version = "^0.4.2", optional = true} pymongo = {version = "^4.3.3", optional = true} clickhouse-connect = {version="^0.5.14", optional=true} weaviate-client = {version = "^3", optional = true} -marqo = {version = "^0.11.0", optional=true} +marqo = {version = "^1.2.4", optional=true} google-api-python-client = {version = "2.70.0", optional = true} google-auth = {version = "^2.18.1", optional = true} wolframalpha = {version = "5.0.0", optional = true} diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_marqo.py b/libs/langchain/tests/integration_tests/vectorstores/test_marqo.py index 4821ee2dc2a..94549735a67 100644 --- a/libs/langchain/tests/integration_tests/vectorstores/test_marqo.py +++ b/libs/langchain/tests/integration_tests/vectorstores/test_marqo.py @@ -158,6 +158,7 @@ def test_marqo_multimodal() -> None: "mainline/examples/ImageSearchGuide/data/image2.jpg", }, ], + tensor_fields=["caption", "image"], ) def get_content(res: Dict[str, str]) -> str: