From 10418ab0c12ab3ba048ce524cc1947619a12274f Mon Sep 17 00:00:00 2001 From: Adilkhan Sarsen <54854336+adolkhan@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:46:01 +0600 Subject: [PATCH] DeepLake Backwards compatibility fix (#13388) - **Description:** during search with DeepLake some people are facing backwards compatibility issues, this PR fixes it by making search accessible for the older datasets --------- Co-authored-by: adolkhan --- .../langchain/vectorstores/deeplake.py | 2 +- .../vectorstores/test_deeplake.py | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/vectorstores/deeplake.py b/libs/langchain/langchain/vectorstores/deeplake.py index b6223772ab0..f8fa6a8fc89 100644 --- a/libs/langchain/langchain/vectorstores/deeplake.py +++ b/libs/langchain/langchain/vectorstores/deeplake.py @@ -413,7 +413,7 @@ class DeepLake(VectorStore): distance_metric=distance_metric, filter=filter, exec_option=exec_option, - return_tensors=["embedding", "metadata", "text", "id"], + return_tensors=["embedding", "metadata", "text", self._id_tensor_name], deep_memory=deep_memory, ) diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_deeplake.py b/libs/langchain/tests/integration_tests/vectorstores/test_deeplake.py index a666329cce8..5e943d5cf4e 100644 --- a/libs/langchain/tests/integration_tests/vectorstores/test_deeplake.py +++ b/libs/langchain/tests/integration_tests/vectorstores/test_deeplake.py @@ -259,3 +259,25 @@ def test_add_texts(deeplake_datastore: DeepLake) -> None: texts=texts, metada=metadatas, ) + + +def test_ids_backwards_compatibility() -> None: + """Test that ids are backwards compatible.""" + db = DeepLake( + dataset_path="mem://test_path", + embedding_function=FakeEmbeddings(), + tensor_params=[ + {"name": "ids", "htype": "text"}, + {"name": "text", "htype": "text"}, + {"name": "embedding", "htype": "embedding"}, + {"name": "metadata", "htype": "json"}, + ], + ) + db.vectorstore.add( + ids=["1", "2", "3"], + text=["foo", "bar", "baz"], + embedding=FakeEmbeddings().embed_documents(["foo", "bar", "baz"]), + metadata=[{"page": str(i)} for i in range(3)], + ) + output = db.similarity_search("foo", k=1) + assert len(output) == 1