From 500569da4889d1cfe9266cad66dc83c83c54dbcc Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Mon, 13 May 2024 07:45:31 -0700 Subject: [PATCH] community[patch]: `vectorstores` import update (#21169) Issue: we have several helper functions to import third-party libraries like lancedb.import_lancedb in [community.vectorstores](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.lancedb.import_lancedb.html#langchain_community.vectorstores.lancedb.import_lancedb). And we have core.utils.utils.guard_import that works exactly for this purpose. The import_ functions work inconsistently and rather be private functions. Change: replaced these functions with the guard_import function. Related to #21133 --- .../langchain_community/vectorstores/annoy.py | 14 +++----- .../vectorstores/lancedb.py | 13 ++----- .../langchain_community/vectorstores/scann.py | 14 +++----- .../vectorstores/tiledb.py | 35 +++++++++++-------- .../vectorstores/usearch.py | 12 ++----- 5 files changed, 34 insertions(+), 54 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/annoy.py b/libs/community/langchain_community/vectorstores/annoy.py index d35def219e2..666b8cf4550 100644 --- a/libs/community/langchain_community/vectorstores/annoy.py +++ b/libs/community/langchain_community/vectorstores/annoy.py @@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple import numpy as np from langchain_core.documents import Document from langchain_core.embeddings import Embeddings +from langchain_core.utils import guard_import from langchain_core.vectorstores import VectorStore from langchain_community.docstore.base import Docstore @@ -22,14 +23,7 @@ DEFAULT_METRIC = "angular" def dependable_annoy_import() -> Any: """Import annoy if available, otherwise raise error.""" - try: - import annoy - except ImportError: - raise ImportError( - "Could not import annoy python package. " - "Please install it with `pip install --user annoy` " - ) - return annoy + return guard_import("annoy") class Annoy(VectorStore): @@ -300,7 +294,7 @@ class Annoy(VectorStore): f"Expected one of {list(INDEX_METRICS)}" ) ) - annoy = dependable_annoy_import() + annoy = guard_import("annoy") if not embeddings: raise ValueError("embeddings must be provided to build AnnoyIndex") f = len(embeddings[0]) @@ -459,7 +453,7 @@ class Annoy(VectorStore): ) path = Path(folder_path) # load index separately since it is not picklable - annoy = dependable_annoy_import() + annoy = guard_import("annoy") # load docstore and index_to_docstore_id with open(path / "index.pkl", "rb") as file: docstore, index_to_docstore_id, config_object = pickle.load(file) diff --git a/libs/community/langchain_community/vectorstores/lancedb.py b/libs/community/langchain_community/vectorstores/lancedb.py index ea86cec20a1..bb28165be24 100644 --- a/libs/community/langchain_community/vectorstores/lancedb.py +++ b/libs/community/langchain_community/vectorstores/lancedb.py @@ -7,20 +7,13 @@ from typing import Any, Iterable, List, Optional from langchain_core.documents import Document from langchain_core.embeddings import Embeddings +from langchain_core.utils import guard_import from langchain_core.vectorstores import VectorStore def import_lancedb() -> Any: """Import lancedb package.""" - - try: - import lancedb - except ImportError as e: - raise ImportError( - "Could not import pinecone lancedb package. " - "Please install it with `pip install lancedb`." - ) from e - return lancedb + return guard_import("lancedb") class LanceDB(VectorStore): @@ -64,7 +57,7 @@ class LanceDB(VectorStore): mode: Optional[str] = "overwrite", ): """Initialize with Lance DB vectorstore""" - lancedb = import_lancedb() + lancedb = guard_import("lancedb") self._embedding = embedding self._vector_key = vector_key self._id_key = id_key diff --git a/libs/community/langchain_community/vectorstores/scann.py b/libs/community/langchain_community/vectorstores/scann.py index 67fc46096e2..11e7bf47041 100644 --- a/libs/community/langchain_community/vectorstores/scann.py +++ b/libs/community/langchain_community/vectorstores/scann.py @@ -9,6 +9,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple import numpy as np from langchain_core.documents import Document from langchain_core.embeddings import Embeddings +from langchain_core.utils import guard_import from langchain_core.vectorstores import VectorStore from langchain_community.docstore.base import AddableMixin, Docstore @@ -26,14 +27,7 @@ def dependable_scann_import() -> Any: """ Import `scann` if available, otherwise raise error. """ - try: - import scann - except ImportError: - raise ImportError( - "Could not import scann python package. " - "Please install it with `pip install scann` " - ) - return scann + return guard_import("scann") class ScaNN(VectorStore): @@ -312,7 +306,7 @@ class ScaNN(VectorStore): normalize_L2: bool = False, **kwargs: Any, ) -> ScaNN: - scann = dependable_scann_import() + scann = guard_import("scann") distance_strategy = kwargs.get( "distance_strategy", DistanceStrategy.EUCLIDEAN_DISTANCE ) @@ -494,7 +488,7 @@ class ScaNN(VectorStore): scann_path = path / "{index_name}.scann".format(index_name=index_name) scann_path.mkdir(exist_ok=True, parents=True) # load index separately since it is not picklable - scann = dependable_scann_import() + scann = guard_import("scann") index = scann.scann_ops_pybind.load_searcher(str(scann_path)) # load docstore and index_to_docstore_id diff --git a/libs/community/langchain_community/vectorstores/tiledb.py b/libs/community/langchain_community/vectorstores/tiledb.py index 4983488fba2..d8c3c640871 100644 --- a/libs/community/langchain_community/vectorstores/tiledb.py +++ b/libs/community/langchain_community/vectorstores/tiledb.py @@ -1,4 +1,5 @@ """Wrapper around TileDB vector database.""" + from __future__ import annotations import pickle @@ -9,6 +10,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple import numpy as np from langchain_core.documents import Document from langchain_core.embeddings import Embeddings +from langchain_core.utils import guard_import from langchain_core.vectorstores import VectorStore from langchain_community.vectorstores.utils import maximal_marginal_relevance @@ -24,16 +26,10 @@ MAX_FLOAT = sys.float_info.max def dependable_tiledb_import() -> Any: """Import tiledb-vector-search if available, otherwise raise error.""" - try: - import tiledb as tiledb - import tiledb.vector_search as tiledb_vs - except ImportError: - raise ImportError( - "Could not import tiledb-vector-search python package. " - "Please install it with `conda install -c tiledb tiledb-vector-search` " - "or `pip install tiledb-vector-search`" - ) - return tiledb_vs, tiledb + return ( + guard_import("tiledb.vector_search"), + guard_import("tiledb"), + ) def get_vector_index_uri_from_group(group: Any) -> str: @@ -115,7 +111,10 @@ class TileDB(VectorStore): self.metric = metric self.config = config - tiledb_vs, tiledb = dependable_tiledb_import() + tiledb_vs, tiledb = ( + guard_import("tiledb.vector_search"), + guard_import("tiledb"), + ) with tiledb.scope_ctx(ctx_or_config=config): index_group = tiledb.Group(self.index_uri, "r") self.vector_index_uri = ( @@ -173,7 +172,7 @@ class TileDB(VectorStore): Returns: List of Documents and scores. """ - tiledb_vs, tiledb = dependable_tiledb_import() + tiledb = guard_import("tiledb") docs = [] docs_array = tiledb.open( self.docs_array_uri, "r", timestamp=self.timestamp, config=self.config @@ -477,7 +476,10 @@ class TileDB(VectorStore): metadatas: bool = True, config: Optional[Mapping[str, Any]] = None, ) -> None: - tiledb_vs, tiledb = dependable_tiledb_import() + tiledb_vs, tiledb = ( + guard_import("tiledb.vector_search"), + guard_import("tiledb"), + ) with tiledb.scope_ctx(ctx_or_config=config): try: tiledb.group_create(index_uri) @@ -550,7 +552,10 @@ class TileDB(VectorStore): f"Expected one of {list(INDEX_METRICS)}" ) ) - tiledb_vs, tiledb = dependable_tiledb_import() + tiledb_vs, tiledb = ( + guard_import("tiledb.vector_search"), + guard_import("tiledb"), + ) input_vectors = np.array(embeddings).astype(np.float32) cls.create( index_uri=index_uri, @@ -646,7 +651,7 @@ class TileDB(VectorStore): Returns: List of ids from adding the texts into the vectorstore. """ - tiledb_vs, tiledb = dependable_tiledb_import() + tiledb = guard_import("tiledb") embeddings = self.embedding.embed_documents(list(texts)) if ids is None: ids = [str(random.randint(0, MAX_UINT64 - 1)) for _ in texts] diff --git a/libs/community/langchain_community/vectorstores/usearch.py b/libs/community/langchain_community/vectorstores/usearch.py index 6ca66e630c8..fa94d19de00 100644 --- a/libs/community/langchain_community/vectorstores/usearch.py +++ b/libs/community/langchain_community/vectorstores/usearch.py @@ -5,6 +5,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple import numpy as np from langchain_core.documents import Document from langchain_core.embeddings import Embeddings +from langchain_core.utils import guard_import from langchain_core.vectorstores import VectorStore from langchain_community.docstore.base import AddableMixin, Docstore @@ -15,14 +16,7 @@ def dependable_usearch_import() -> Any: """ Import usearch if available, otherwise raise error. """ - try: - import usearch.index - except ImportError: - raise ImportError( - "Could not import usearch python package. " - "Please install it with `pip install usearch` " - ) - return usearch.index + return guard_import("usearch.index") class USearch(VectorStore): @@ -170,7 +164,7 @@ class USearch(VectorStore): documents.append(Document(page_content=text, metadata=metadata)) docstore = InMemoryDocstore(dict(zip(ids, documents))) - usearch = dependable_usearch_import() + usearch = guard_import("usearch.index") index = usearch.Index(ndim=len(embeddings[0]), metric=metric) index.add(np.array(ids), np.array(embeddings)) return cls(embedding, index, docstore, ids.tolist())