mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-01 10:54:15 +00:00
community[patch]: activeloop ai tql deprecation (#14634)
Co-authored-by: AdkSarsen <adilkhan@activeloop.ai>
This commit is contained in:
parent
c95facc293
commit
8457c31c04
@ -51,6 +51,7 @@ class DeepLake(VectorStore):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_LANGCHAIN_DEFAULT_DEEPLAKE_PATH = "./deeplake/"
|
_LANGCHAIN_DEFAULT_DEEPLAKE_PATH = "./deeplake/"
|
||||||
|
_valid_search_kwargs = ["lambda_mult"]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -219,11 +220,7 @@ class DeepLake(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List[str]: List of IDs of the added texts.
|
List[str]: List of IDs of the added texts.
|
||||||
"""
|
"""
|
||||||
if kwargs:
|
self._validate_kwargs(kwargs, "add_texts")
|
||||||
unsupported_items = "`, `".join(set(kwargs.keys()))
|
|
||||||
raise TypeError(
|
|
||||||
f"`{unsupported_items}` is/are not a valid argument to add_text method"
|
|
||||||
)
|
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if ids:
|
if ids:
|
||||||
@ -371,6 +368,9 @@ class DeepLake(VectorStore):
|
|||||||
Raises:
|
Raises:
|
||||||
ValueError: if both `embedding` and `embedding_function` are not specified.
|
ValueError: if both `embedding` and `embedding_function` are not specified.
|
||||||
"""
|
"""
|
||||||
|
if kwargs.get("tql_query"):
|
||||||
|
logger.warning("`tql_query` is deprecated. Please use `tql` instead.")
|
||||||
|
kwargs["tql"] = kwargs.pop("tql_query")
|
||||||
|
|
||||||
if kwargs.get("tql"):
|
if kwargs.get("tql"):
|
||||||
return self._search_tql(
|
return self._search_tql(
|
||||||
@ -384,6 +384,8 @@ class DeepLake(VectorStore):
|
|||||||
filter=filter,
|
filter=filter,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self._validate_kwargs(kwargs, "search")
|
||||||
|
|
||||||
if embedding_function:
|
if embedding_function:
|
||||||
if isinstance(embedding_function, Embeddings):
|
if isinstance(embedding_function, Embeddings):
|
||||||
_embedding_function = embedding_function.embed_query
|
_embedding_function = embedding_function.embed_query
|
||||||
@ -417,7 +419,6 @@ class DeepLake(VectorStore):
|
|||||||
return_tensors=["embedding", "metadata", "text", self._id_tensor_name],
|
return_tensors=["embedding", "metadata", "text", self._id_tensor_name],
|
||||||
deep_memory=deep_memory,
|
deep_memory=deep_memory,
|
||||||
)
|
)
|
||||||
|
|
||||||
scores = result["score"]
|
scores = result["score"]
|
||||||
embeddings = result["embedding"]
|
embeddings = result["embedding"]
|
||||||
metadatas = result["metadata"]
|
metadatas = result["metadata"]
|
||||||
@ -445,6 +446,9 @@ class DeepLake(VectorStore):
|
|||||||
]
|
]
|
||||||
|
|
||||||
if return_score:
|
if return_score:
|
||||||
|
if not isinstance(scores, list):
|
||||||
|
scores = [scores]
|
||||||
|
|
||||||
return [(doc, score) for doc, score in zip(docs, scores)]
|
return [(doc, score) for doc, score in zip(docs, scores)]
|
||||||
|
|
||||||
return docs
|
return docs
|
||||||
@ -899,3 +903,30 @@ class DeepLake(VectorStore):
|
|||||||
"better to use `db.vectorstore.dataset` instead."
|
"better to use `db.vectorstore.dataset` instead."
|
||||||
)
|
)
|
||||||
return self.vectorstore.dataset
|
return self.vectorstore.dataset
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _validate_kwargs(cls, kwargs, method_name):
|
||||||
|
if kwargs:
|
||||||
|
valid_items = cls._get_valid_args(method_name)
|
||||||
|
unsupported_items = cls._get_unsupported_items(kwargs, valid_items)
|
||||||
|
|
||||||
|
if unsupported_items:
|
||||||
|
raise TypeError(
|
||||||
|
f"`{unsupported_items}` are not a valid "
|
||||||
|
f"argument to {method_name} method"
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_valid_args(cls, method_name):
|
||||||
|
if method_name == "search":
|
||||||
|
return cls._valid_search_kwargs
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_unsupported_items(kwargs, valid_items):
|
||||||
|
kwargs = {k: v for k, v in kwargs.items() if k not in valid_items}
|
||||||
|
unsupported_items = None
|
||||||
|
if kwargs:
|
||||||
|
unsupported_items = "`, `".join(set(kwargs.keys()))
|
||||||
|
return unsupported_items
|
||||||
|
@ -18,7 +18,9 @@ def deeplake_datastore() -> DeepLake:
|
|||||||
embedding_function=FakeEmbeddings(),
|
embedding_function=FakeEmbeddings(),
|
||||||
overwrite=True,
|
overwrite=True,
|
||||||
)
|
)
|
||||||
return docsearch
|
yield docsearch
|
||||||
|
|
||||||
|
docsearch.delete_dataset()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(params=["L1", "L2", "max", "cos"])
|
@pytest.fixture(params=["L1", "L2", "max", "cos"])
|
||||||
@ -50,27 +52,14 @@ def test_deeplake_with_metadatas() -> None:
|
|||||||
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
||||||
|
|
||||||
|
|
||||||
def test_deeplakewith_persistence() -> None:
|
def test_deeplake_with_persistence(deeplake_datastore) -> None:
|
||||||
"""Test end to end construction and search, with persistence."""
|
"""Test end to end construction and search, with persistence."""
|
||||||
import deeplake
|
output = deeplake_datastore.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
||||||
dataset_path = "./tests/persist_dir"
|
|
||||||
if deeplake.exists(dataset_path):
|
|
||||||
deeplake.delete(dataset_path)
|
|
||||||
|
|
||||||
texts = ["foo", "bar", "baz"]
|
|
||||||
docsearch = DeepLake.from_texts(
|
|
||||||
dataset_path=dataset_path,
|
|
||||||
texts=texts,
|
|
||||||
embedding=FakeEmbeddings(),
|
|
||||||
)
|
|
||||||
|
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
|
||||||
assert output == [Document(page_content="foo")]
|
|
||||||
|
|
||||||
# Get a new VectorStore from the persisted directory
|
# Get a new VectorStore from the persisted directory
|
||||||
docsearch = DeepLake(
|
docsearch = DeepLake(
|
||||||
dataset_path=dataset_path,
|
dataset_path=deeplake_datastore.vectorstore.dataset_handler.path,
|
||||||
embedding_function=FakeEmbeddings(),
|
embedding_function=FakeEmbeddings(),
|
||||||
)
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
@ -83,22 +72,12 @@ def test_deeplakewith_persistence() -> None:
|
|||||||
# Or on program exit
|
# Or on program exit
|
||||||
|
|
||||||
|
|
||||||
def test_deeplake_overwrite_flag() -> None:
|
def test_deeplake_overwrite_flag(deeplake_datastore) -> None:
|
||||||
"""Test overwrite behavior"""
|
"""Test overwrite behavior"""
|
||||||
import deeplake
|
dataset_path = deeplake_datastore.vectorstore.dataset_handler.path
|
||||||
|
|
||||||
dataset_path = "./tests/persist_dir"
|
output = deeplake_datastore.similarity_search("foo", k=1)
|
||||||
if deeplake.exists(dataset_path):
|
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
||||||
deeplake.delete(dataset_path)
|
|
||||||
|
|
||||||
texts = ["foo", "bar", "baz"]
|
|
||||||
docsearch = DeepLake.from_texts(
|
|
||||||
dataset_path=dataset_path,
|
|
||||||
texts=texts,
|
|
||||||
embedding=FakeEmbeddings(),
|
|
||||||
)
|
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
|
||||||
assert output == [Document(page_content="foo")]
|
|
||||||
|
|
||||||
# Get a new VectorStore from the persisted directory, with no overwrite (implicit)
|
# Get a new VectorStore from the persisted directory, with no overwrite (implicit)
|
||||||
docsearch = DeepLake(
|
docsearch = DeepLake(
|
||||||
@ -107,7 +86,7 @@ def test_deeplake_overwrite_flag() -> None:
|
|||||||
)
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
# assert page still present
|
# assert page still present
|
||||||
assert output == [Document(page_content="foo")]
|
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
||||||
|
|
||||||
# Get a new VectorStore from the persisted directory, with no overwrite (explicit)
|
# Get a new VectorStore from the persisted directory, with no overwrite (explicit)
|
||||||
docsearch = DeepLake(
|
docsearch = DeepLake(
|
||||||
@ -117,7 +96,7 @@ def test_deeplake_overwrite_flag() -> None:
|
|||||||
)
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
# assert page still present
|
# assert page still present
|
||||||
assert output == [Document(page_content="foo")]
|
assert output == [Document(page_content="foo", metadata={"page": "0"})]
|
||||||
|
|
||||||
# Get a new VectorStore from the persisted directory, with overwrite
|
# Get a new VectorStore from the persisted directory, with overwrite
|
||||||
docsearch = DeepLake(
|
docsearch = DeepLake(
|
||||||
@ -129,8 +108,9 @@ def test_deeplake_overwrite_flag() -> None:
|
|||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
|
||||||
|
|
||||||
def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -> None:
|
def test_similarity_search(deeplake_datastore) -> None:
|
||||||
"""Test similarity search."""
|
"""Test similarity search."""
|
||||||
|
distance_metric = "cos"
|
||||||
output = deeplake_datastore.similarity_search(
|
output = deeplake_datastore.similarity_search(
|
||||||
"foo", k=1, distance_metric=distance_metric
|
"foo", k=1, distance_metric=distance_metric
|
||||||
)
|
)
|
||||||
@ -145,7 +125,6 @@ def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -
|
|||||||
query="foo", tql_query=tql_query, k=1, distance_metric=distance_metric
|
query="foo", tql_query=tql_query, k=1, distance_metric=distance_metric
|
||||||
)
|
)
|
||||||
assert len(output) == 1
|
assert len(output) == 1
|
||||||
deeplake_datastore.delete_dataset()
|
|
||||||
|
|
||||||
|
|
||||||
def test_similarity_search_by_vector(
|
def test_similarity_search_by_vector(
|
||||||
@ -164,6 +143,7 @@ def test_similarity_search_with_score(
|
|||||||
deeplake_datastore: DeepLake, distance_metric: str
|
deeplake_datastore: DeepLake, distance_metric: str
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test similarity search with score."""
|
"""Test similarity search with score."""
|
||||||
|
deeplake_datastore.vectorstore.summary()
|
||||||
output, score = deeplake_datastore.similarity_search_with_score(
|
output, score = deeplake_datastore.similarity_search_with_score(
|
||||||
"foo", k=1, distance_metric=distance_metric
|
"foo", k=1, distance_metric=distance_metric
|
||||||
)[0]
|
)[0]
|
||||||
@ -281,3 +261,11 @@ def test_ids_backwards_compatibility() -> None:
|
|||||||
)
|
)
|
||||||
output = db.similarity_search("foo", k=1)
|
output = db.similarity_search("foo", k=1)
|
||||||
assert len(output) == 1
|
assert len(output) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_similarity_search_should_error_out_when_not_supported_kwargs_are_provided(
|
||||||
|
deeplake_datastore: DeepLake,
|
||||||
|
) -> None:
|
||||||
|
"""Test that ids are backwards compatible."""
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
deeplake_datastore.similarity_search("foo", k=1, not_supported_kwarg=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user