mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-14 10:59:03 +00:00
community[patch]: vectorstores
import update (#21169)
Issue: we have several helper functions to import third-party libraries like lancedb.import_lancedb in [community.vectorstores](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.lancedb.import_lancedb.html#langchain_community.vectorstores.lancedb.import_lancedb). And we have core.utils.utils.guard_import that works exactly for this purpose. The import_<package> functions work inconsistently and rather be private functions. Change: replaced these functions with the guard_import function. Related to #21133
This commit is contained in:
parent
3003363605
commit
500569da48
@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils import guard_import
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
from langchain_community.docstore.base import Docstore
|
||||
@ -22,14 +23,7 @@ DEFAULT_METRIC = "angular"
|
||||
|
||||
def dependable_annoy_import() -> Any:
|
||||
"""Import annoy if available, otherwise raise error."""
|
||||
try:
|
||||
import annoy
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import annoy python package. "
|
||||
"Please install it with `pip install --user annoy` "
|
||||
)
|
||||
return annoy
|
||||
return guard_import("annoy")
|
||||
|
||||
|
||||
class Annoy(VectorStore):
|
||||
@ -300,7 +294,7 @@ class Annoy(VectorStore):
|
||||
f"Expected one of {list(INDEX_METRICS)}"
|
||||
)
|
||||
)
|
||||
annoy = dependable_annoy_import()
|
||||
annoy = guard_import("annoy")
|
||||
if not embeddings:
|
||||
raise ValueError("embeddings must be provided to build AnnoyIndex")
|
||||
f = len(embeddings[0])
|
||||
@ -459,7 +453,7 @@ class Annoy(VectorStore):
|
||||
)
|
||||
path = Path(folder_path)
|
||||
# load index separately since it is not picklable
|
||||
annoy = dependable_annoy_import()
|
||||
annoy = guard_import("annoy")
|
||||
# load docstore and index_to_docstore_id
|
||||
with open(path / "index.pkl", "rb") as file:
|
||||
docstore, index_to_docstore_id, config_object = pickle.load(file)
|
||||
|
@ -7,20 +7,13 @@ from typing import Any, Iterable, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils import guard_import
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
|
||||
def import_lancedb() -> Any:
|
||||
"""Import lancedb package."""
|
||||
|
||||
try:
|
||||
import lancedb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import pinecone lancedb package. "
|
||||
"Please install it with `pip install lancedb`."
|
||||
) from e
|
||||
return lancedb
|
||||
return guard_import("lancedb")
|
||||
|
||||
|
||||
class LanceDB(VectorStore):
|
||||
@ -64,7 +57,7 @@ class LanceDB(VectorStore):
|
||||
mode: Optional[str] = "overwrite",
|
||||
):
|
||||
"""Initialize with Lance DB vectorstore"""
|
||||
lancedb = import_lancedb()
|
||||
lancedb = guard_import("lancedb")
|
||||
self._embedding = embedding
|
||||
self._vector_key = vector_key
|
||||
self._id_key = id_key
|
||||
|
@ -9,6 +9,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils import guard_import
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
from langchain_community.docstore.base import AddableMixin, Docstore
|
||||
@ -26,14 +27,7 @@ def dependable_scann_import() -> Any:
|
||||
"""
|
||||
Import `scann` if available, otherwise raise error.
|
||||
"""
|
||||
try:
|
||||
import scann
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import scann python package. "
|
||||
"Please install it with `pip install scann` "
|
||||
)
|
||||
return scann
|
||||
return guard_import("scann")
|
||||
|
||||
|
||||
class ScaNN(VectorStore):
|
||||
@ -312,7 +306,7 @@ class ScaNN(VectorStore):
|
||||
normalize_L2: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> ScaNN:
|
||||
scann = dependable_scann_import()
|
||||
scann = guard_import("scann")
|
||||
distance_strategy = kwargs.get(
|
||||
"distance_strategy", DistanceStrategy.EUCLIDEAN_DISTANCE
|
||||
)
|
||||
@ -494,7 +488,7 @@ class ScaNN(VectorStore):
|
||||
scann_path = path / "{index_name}.scann".format(index_name=index_name)
|
||||
scann_path.mkdir(exist_ok=True, parents=True)
|
||||
# load index separately since it is not picklable
|
||||
scann = dependable_scann_import()
|
||||
scann = guard_import("scann")
|
||||
index = scann.scann_ops_pybind.load_searcher(str(scann_path))
|
||||
|
||||
# load docstore and index_to_docstore_id
|
||||
|
@ -1,4 +1,5 @@
|
||||
"""Wrapper around TileDB vector database."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pickle
|
||||
@ -9,6 +10,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils import guard_import
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
from langchain_community.vectorstores.utils import maximal_marginal_relevance
|
||||
@ -24,16 +26,10 @@ MAX_FLOAT = sys.float_info.max
|
||||
|
||||
def dependable_tiledb_import() -> Any:
|
||||
"""Import tiledb-vector-search if available, otherwise raise error."""
|
||||
try:
|
||||
import tiledb as tiledb
|
||||
import tiledb.vector_search as tiledb_vs
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import tiledb-vector-search python package. "
|
||||
"Please install it with `conda install -c tiledb tiledb-vector-search` "
|
||||
"or `pip install tiledb-vector-search`"
|
||||
)
|
||||
return tiledb_vs, tiledb
|
||||
return (
|
||||
guard_import("tiledb.vector_search"),
|
||||
guard_import("tiledb"),
|
||||
)
|
||||
|
||||
|
||||
def get_vector_index_uri_from_group(group: Any) -> str:
|
||||
@ -115,7 +111,10 @@ class TileDB(VectorStore):
|
||||
self.metric = metric
|
||||
self.config = config
|
||||
|
||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
||||
tiledb_vs, tiledb = (
|
||||
guard_import("tiledb.vector_search"),
|
||||
guard_import("tiledb"),
|
||||
)
|
||||
with tiledb.scope_ctx(ctx_or_config=config):
|
||||
index_group = tiledb.Group(self.index_uri, "r")
|
||||
self.vector_index_uri = (
|
||||
@ -173,7 +172,7 @@ class TileDB(VectorStore):
|
||||
Returns:
|
||||
List of Documents and scores.
|
||||
"""
|
||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
||||
tiledb = guard_import("tiledb")
|
||||
docs = []
|
||||
docs_array = tiledb.open(
|
||||
self.docs_array_uri, "r", timestamp=self.timestamp, config=self.config
|
||||
@ -477,7 +476,10 @@ class TileDB(VectorStore):
|
||||
metadatas: bool = True,
|
||||
config: Optional[Mapping[str, Any]] = None,
|
||||
) -> None:
|
||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
||||
tiledb_vs, tiledb = (
|
||||
guard_import("tiledb.vector_search"),
|
||||
guard_import("tiledb"),
|
||||
)
|
||||
with tiledb.scope_ctx(ctx_or_config=config):
|
||||
try:
|
||||
tiledb.group_create(index_uri)
|
||||
@ -550,7 +552,10 @@ class TileDB(VectorStore):
|
||||
f"Expected one of {list(INDEX_METRICS)}"
|
||||
)
|
||||
)
|
||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
||||
tiledb_vs, tiledb = (
|
||||
guard_import("tiledb.vector_search"),
|
||||
guard_import("tiledb"),
|
||||
)
|
||||
input_vectors = np.array(embeddings).astype(np.float32)
|
||||
cls.create(
|
||||
index_uri=index_uri,
|
||||
@ -646,7 +651,7 @@ class TileDB(VectorStore):
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
||||
tiledb = guard_import("tiledb")
|
||||
embeddings = self.embedding.embed_documents(list(texts))
|
||||
if ids is None:
|
||||
ids = [str(random.randint(0, MAX_UINT64 - 1)) for _ in texts]
|
||||
|
@ -5,6 +5,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils import guard_import
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
from langchain_community.docstore.base import AddableMixin, Docstore
|
||||
@ -15,14 +16,7 @@ def dependable_usearch_import() -> Any:
|
||||
"""
|
||||
Import usearch if available, otherwise raise error.
|
||||
"""
|
||||
try:
|
||||
import usearch.index
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import usearch python package. "
|
||||
"Please install it with `pip install usearch` "
|
||||
)
|
||||
return usearch.index
|
||||
return guard_import("usearch.index")
|
||||
|
||||
|
||||
class USearch(VectorStore):
|
||||
@ -170,7 +164,7 @@ class USearch(VectorStore):
|
||||
documents.append(Document(page_content=text, metadata=metadata))
|
||||
|
||||
docstore = InMemoryDocstore(dict(zip(ids, documents)))
|
||||
usearch = dependable_usearch_import()
|
||||
usearch = guard_import("usearch.index")
|
||||
index = usearch.Index(ndim=len(embeddings[0]), metric=metric)
|
||||
index.add(np.array(ids), np.array(embeddings))
|
||||
return cls(embedding, index, docstore, ids.tolist())
|
||||
|
Loading…
Reference in New Issue
Block a user