mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-15 19:38:57 +00:00
community[patch]: vectorstores
import update (#21169)
Issue: we have several helper functions to import third-party libraries like lancedb.import_lancedb in [community.vectorstores](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.lancedb.import_lancedb.html#langchain_community.vectorstores.lancedb.import_lancedb). And we have core.utils.utils.guard_import that works exactly for this purpose. The import_<package> functions work inconsistently and rather be private functions. Change: replaced these functions with the guard_import function. Related to #21133
This commit is contained in:
parent
3003363605
commit
500569da48
@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.utils import guard_import
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
from langchain_community.docstore.base import Docstore
|
from langchain_community.docstore.base import Docstore
|
||||||
@ -22,14 +23,7 @@ DEFAULT_METRIC = "angular"
|
|||||||
|
|
||||||
def dependable_annoy_import() -> Any:
|
def dependable_annoy_import() -> Any:
|
||||||
"""Import annoy if available, otherwise raise error."""
|
"""Import annoy if available, otherwise raise error."""
|
||||||
try:
|
return guard_import("annoy")
|
||||||
import annoy
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"Could not import annoy python package. "
|
|
||||||
"Please install it with `pip install --user annoy` "
|
|
||||||
)
|
|
||||||
return annoy
|
|
||||||
|
|
||||||
|
|
||||||
class Annoy(VectorStore):
|
class Annoy(VectorStore):
|
||||||
@ -300,7 +294,7 @@ class Annoy(VectorStore):
|
|||||||
f"Expected one of {list(INDEX_METRICS)}"
|
f"Expected one of {list(INDEX_METRICS)}"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
annoy = dependable_annoy_import()
|
annoy = guard_import("annoy")
|
||||||
if not embeddings:
|
if not embeddings:
|
||||||
raise ValueError("embeddings must be provided to build AnnoyIndex")
|
raise ValueError("embeddings must be provided to build AnnoyIndex")
|
||||||
f = len(embeddings[0])
|
f = len(embeddings[0])
|
||||||
@ -459,7 +453,7 @@ class Annoy(VectorStore):
|
|||||||
)
|
)
|
||||||
path = Path(folder_path)
|
path = Path(folder_path)
|
||||||
# load index separately since it is not picklable
|
# load index separately since it is not picklable
|
||||||
annoy = dependable_annoy_import()
|
annoy = guard_import("annoy")
|
||||||
# load docstore and index_to_docstore_id
|
# load docstore and index_to_docstore_id
|
||||||
with open(path / "index.pkl", "rb") as file:
|
with open(path / "index.pkl", "rb") as file:
|
||||||
docstore, index_to_docstore_id, config_object = pickle.load(file)
|
docstore, index_to_docstore_id, config_object = pickle.load(file)
|
||||||
|
@ -7,20 +7,13 @@ from typing import Any, Iterable, List, Optional
|
|||||||
|
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.utils import guard_import
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
|
|
||||||
def import_lancedb() -> Any:
|
def import_lancedb() -> Any:
|
||||||
"""Import lancedb package."""
|
"""Import lancedb package."""
|
||||||
|
return guard_import("lancedb")
|
||||||
try:
|
|
||||||
import lancedb
|
|
||||||
except ImportError as e:
|
|
||||||
raise ImportError(
|
|
||||||
"Could not import pinecone lancedb package. "
|
|
||||||
"Please install it with `pip install lancedb`."
|
|
||||||
) from e
|
|
||||||
return lancedb
|
|
||||||
|
|
||||||
|
|
||||||
class LanceDB(VectorStore):
|
class LanceDB(VectorStore):
|
||||||
@ -64,7 +57,7 @@ class LanceDB(VectorStore):
|
|||||||
mode: Optional[str] = "overwrite",
|
mode: Optional[str] = "overwrite",
|
||||||
):
|
):
|
||||||
"""Initialize with Lance DB vectorstore"""
|
"""Initialize with Lance DB vectorstore"""
|
||||||
lancedb = import_lancedb()
|
lancedb = guard_import("lancedb")
|
||||||
self._embedding = embedding
|
self._embedding = embedding
|
||||||
self._vector_key = vector_key
|
self._vector_key = vector_key
|
||||||
self._id_key = id_key
|
self._id_key = id_key
|
||||||
|
@ -9,6 +9,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.utils import guard_import
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
from langchain_community.docstore.base import AddableMixin, Docstore
|
from langchain_community.docstore.base import AddableMixin, Docstore
|
||||||
@ -26,14 +27,7 @@ def dependable_scann_import() -> Any:
|
|||||||
"""
|
"""
|
||||||
Import `scann` if available, otherwise raise error.
|
Import `scann` if available, otherwise raise error.
|
||||||
"""
|
"""
|
||||||
try:
|
return guard_import("scann")
|
||||||
import scann
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"Could not import scann python package. "
|
|
||||||
"Please install it with `pip install scann` "
|
|
||||||
)
|
|
||||||
return scann
|
|
||||||
|
|
||||||
|
|
||||||
class ScaNN(VectorStore):
|
class ScaNN(VectorStore):
|
||||||
@ -312,7 +306,7 @@ class ScaNN(VectorStore):
|
|||||||
normalize_L2: bool = False,
|
normalize_L2: bool = False,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> ScaNN:
|
) -> ScaNN:
|
||||||
scann = dependable_scann_import()
|
scann = guard_import("scann")
|
||||||
distance_strategy = kwargs.get(
|
distance_strategy = kwargs.get(
|
||||||
"distance_strategy", DistanceStrategy.EUCLIDEAN_DISTANCE
|
"distance_strategy", DistanceStrategy.EUCLIDEAN_DISTANCE
|
||||||
)
|
)
|
||||||
@ -494,7 +488,7 @@ class ScaNN(VectorStore):
|
|||||||
scann_path = path / "{index_name}.scann".format(index_name=index_name)
|
scann_path = path / "{index_name}.scann".format(index_name=index_name)
|
||||||
scann_path.mkdir(exist_ok=True, parents=True)
|
scann_path.mkdir(exist_ok=True, parents=True)
|
||||||
# load index separately since it is not picklable
|
# load index separately since it is not picklable
|
||||||
scann = dependable_scann_import()
|
scann = guard_import("scann")
|
||||||
index = scann.scann_ops_pybind.load_searcher(str(scann_path))
|
index = scann.scann_ops_pybind.load_searcher(str(scann_path))
|
||||||
|
|
||||||
# load docstore and index_to_docstore_id
|
# load docstore and index_to_docstore_id
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
"""Wrapper around TileDB vector database."""
|
"""Wrapper around TileDB vector database."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
@ -9,6 +10,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.utils import guard_import
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
from langchain_community.vectorstores.utils import maximal_marginal_relevance
|
from langchain_community.vectorstores.utils import maximal_marginal_relevance
|
||||||
@ -24,16 +26,10 @@ MAX_FLOAT = sys.float_info.max
|
|||||||
|
|
||||||
def dependable_tiledb_import() -> Any:
|
def dependable_tiledb_import() -> Any:
|
||||||
"""Import tiledb-vector-search if available, otherwise raise error."""
|
"""Import tiledb-vector-search if available, otherwise raise error."""
|
||||||
try:
|
return (
|
||||||
import tiledb as tiledb
|
guard_import("tiledb.vector_search"),
|
||||||
import tiledb.vector_search as tiledb_vs
|
guard_import("tiledb"),
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"Could not import tiledb-vector-search python package. "
|
|
||||||
"Please install it with `conda install -c tiledb tiledb-vector-search` "
|
|
||||||
"or `pip install tiledb-vector-search`"
|
|
||||||
)
|
)
|
||||||
return tiledb_vs, tiledb
|
|
||||||
|
|
||||||
|
|
||||||
def get_vector_index_uri_from_group(group: Any) -> str:
|
def get_vector_index_uri_from_group(group: Any) -> str:
|
||||||
@ -115,7 +111,10 @@ class TileDB(VectorStore):
|
|||||||
self.metric = metric
|
self.metric = metric
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
tiledb_vs, tiledb = (
|
||||||
|
guard_import("tiledb.vector_search"),
|
||||||
|
guard_import("tiledb"),
|
||||||
|
)
|
||||||
with tiledb.scope_ctx(ctx_or_config=config):
|
with tiledb.scope_ctx(ctx_or_config=config):
|
||||||
index_group = tiledb.Group(self.index_uri, "r")
|
index_group = tiledb.Group(self.index_uri, "r")
|
||||||
self.vector_index_uri = (
|
self.vector_index_uri = (
|
||||||
@ -173,7 +172,7 @@ class TileDB(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of Documents and scores.
|
List of Documents and scores.
|
||||||
"""
|
"""
|
||||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
tiledb = guard_import("tiledb")
|
||||||
docs = []
|
docs = []
|
||||||
docs_array = tiledb.open(
|
docs_array = tiledb.open(
|
||||||
self.docs_array_uri, "r", timestamp=self.timestamp, config=self.config
|
self.docs_array_uri, "r", timestamp=self.timestamp, config=self.config
|
||||||
@ -477,7 +476,10 @@ class TileDB(VectorStore):
|
|||||||
metadatas: bool = True,
|
metadatas: bool = True,
|
||||||
config: Optional[Mapping[str, Any]] = None,
|
config: Optional[Mapping[str, Any]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
tiledb_vs, tiledb = (
|
||||||
|
guard_import("tiledb.vector_search"),
|
||||||
|
guard_import("tiledb"),
|
||||||
|
)
|
||||||
with tiledb.scope_ctx(ctx_or_config=config):
|
with tiledb.scope_ctx(ctx_or_config=config):
|
||||||
try:
|
try:
|
||||||
tiledb.group_create(index_uri)
|
tiledb.group_create(index_uri)
|
||||||
@ -550,7 +552,10 @@ class TileDB(VectorStore):
|
|||||||
f"Expected one of {list(INDEX_METRICS)}"
|
f"Expected one of {list(INDEX_METRICS)}"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
tiledb_vs, tiledb = (
|
||||||
|
guard_import("tiledb.vector_search"),
|
||||||
|
guard_import("tiledb"),
|
||||||
|
)
|
||||||
input_vectors = np.array(embeddings).astype(np.float32)
|
input_vectors = np.array(embeddings).astype(np.float32)
|
||||||
cls.create(
|
cls.create(
|
||||||
index_uri=index_uri,
|
index_uri=index_uri,
|
||||||
@ -646,7 +651,7 @@ class TileDB(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of ids from adding the texts into the vectorstore.
|
List of ids from adding the texts into the vectorstore.
|
||||||
"""
|
"""
|
||||||
tiledb_vs, tiledb = dependable_tiledb_import()
|
tiledb = guard_import("tiledb")
|
||||||
embeddings = self.embedding.embed_documents(list(texts))
|
embeddings = self.embedding.embed_documents(list(texts))
|
||||||
if ids is None:
|
if ids is None:
|
||||||
ids = [str(random.randint(0, MAX_UINT64 - 1)) for _ in texts]
|
ids = [str(random.randint(0, MAX_UINT64 - 1)) for _ in texts]
|
||||||
|
@ -5,6 +5,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.utils import guard_import
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
from langchain_community.docstore.base import AddableMixin, Docstore
|
from langchain_community.docstore.base import AddableMixin, Docstore
|
||||||
@ -15,14 +16,7 @@ def dependable_usearch_import() -> Any:
|
|||||||
"""
|
"""
|
||||||
Import usearch if available, otherwise raise error.
|
Import usearch if available, otherwise raise error.
|
||||||
"""
|
"""
|
||||||
try:
|
return guard_import("usearch.index")
|
||||||
import usearch.index
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"Could not import usearch python package. "
|
|
||||||
"Please install it with `pip install usearch` "
|
|
||||||
)
|
|
||||||
return usearch.index
|
|
||||||
|
|
||||||
|
|
||||||
class USearch(VectorStore):
|
class USearch(VectorStore):
|
||||||
@ -170,7 +164,7 @@ class USearch(VectorStore):
|
|||||||
documents.append(Document(page_content=text, metadata=metadata))
|
documents.append(Document(page_content=text, metadata=metadata))
|
||||||
|
|
||||||
docstore = InMemoryDocstore(dict(zip(ids, documents)))
|
docstore = InMemoryDocstore(dict(zip(ids, documents)))
|
||||||
usearch = dependable_usearch_import()
|
usearch = guard_import("usearch.index")
|
||||||
index = usearch.Index(ndim=len(embeddings[0]), metric=metric)
|
index = usearch.Index(ndim=len(embeddings[0]), metric=metric)
|
||||||
index.add(np.array(ids), np.array(embeddings))
|
index.add(np.array(ids), np.array(embeddings))
|
||||||
return cls(embedding, index, docstore, ids.tolist())
|
return cls(embedding, index, docstore, ids.tolist())
|
||||||
|
Loading…
Reference in New Issue
Block a user