mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-04 22:58:42 +00:00
langchain: make numpy optional (#29182)
Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
8c28742980
commit
3c96012f5e
libs/langchain
langchain
chains
evaluation/embedding_distance
retrievers/document_compressors
tests/unit_tests
uv.lock@ -1,9 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
import numpy as np
|
||||
from langchain_core.callbacks import (
|
||||
CallbackManagerForChainRun,
|
||||
)
|
||||
@ -23,6 +23,8 @@ from langchain.chains.flare.prompts import (
|
||||
)
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_tokens_and_log_probs(response: AIMessage) -> Tuple[List[str], List[float]]:
|
||||
"""Extract tokens and log probabilities from chat model response."""
|
||||
@ -57,7 +59,24 @@ def _low_confidence_spans(
|
||||
min_token_gap: int,
|
||||
num_pad_tokens: int,
|
||||
) -> List[str]:
|
||||
_low_idx = np.where(np.exp(log_probs) < min_prob)[0]
|
||||
try:
|
||||
import numpy as np
|
||||
|
||||
_low_idx = np.where(np.exp(log_probs) < min_prob)[0]
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"NumPy not found in the current Python environment. FlareChain will use a "
|
||||
"pure Python implementation for internal calculations, which may "
|
||||
"significantly impact performance, especially for large datasets. For "
|
||||
"optimal speed and efficiency, consider installing NumPy: pip install numpy"
|
||||
)
|
||||
import math
|
||||
|
||||
_low_idx = [ # type: ignore[assignment]
|
||||
idx
|
||||
for idx, log_prob in enumerate(log_probs)
|
||||
if math.exp(log_prob) < min_prob
|
||||
]
|
||||
low_idx = [i for i in _low_idx if re.search(r"\w", tokens[i])]
|
||||
if len(low_idx) == 0:
|
||||
return []
|
||||
|
@ -5,9 +5,9 @@ https://arxiv.org/abs/2212.10496
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
@ -20,6 +20,8 @@ from langchain.chains.base import Chain
|
||||
from langchain.chains.hyde.prompts import PROMPT_MAP
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HypotheticalDocumentEmbedder(Chain, Embeddings):
|
||||
"""Generate hypothetical document for query, and then embed that.
|
||||
@ -54,7 +56,22 @@ class HypotheticalDocumentEmbedder(Chain, Embeddings):
|
||||
|
||||
def combine_embeddings(self, embeddings: List[List[float]]) -> List[float]:
|
||||
"""Combine embeddings into final embeddings."""
|
||||
return list(np.array(embeddings).mean(axis=0))
|
||||
try:
|
||||
import numpy as np
|
||||
|
||||
return list(np.array(embeddings).mean(axis=0))
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"NumPy not found in the current Python environment. "
|
||||
"HypotheticalDocumentEmbedder will use a pure Python implementation "
|
||||
"for internal calculations, which may significantly impact "
|
||||
"performance, especially for large datasets. For optimal speed and "
|
||||
"efficiency, consider installing NumPy: pip install numpy"
|
||||
)
|
||||
if not embeddings:
|
||||
return []
|
||||
num_vectors = len(embeddings)
|
||||
return [sum(dim_values) / num_vectors for dim_values in zip(*embeddings)]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Generate a hypothetical document and embedded it."""
|
||||
|
@ -1,9 +1,11 @@
|
||||
"""A chain for comparing the output of two models using embeddings."""
|
||||
|
||||
import functools
|
||||
import logging
|
||||
from enum import Enum
|
||||
from importlib import util
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
from langchain_core.callbacks.manager import (
|
||||
AsyncCallbackManagerForChainRun,
|
||||
CallbackManagerForChainRun,
|
||||
@ -18,6 +20,34 @@ from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator
|
||||
from langchain.schema import RUN_KEY
|
||||
|
||||
|
||||
def _import_numpy() -> Any:
|
||||
try:
|
||||
import numpy as np
|
||||
|
||||
return np
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import numpy, please install with `pip install numpy`."
|
||||
) from e
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def _check_numpy() -> bool:
|
||||
if bool(util.find_spec("numpy")):
|
||||
return True
|
||||
logger.warning(
|
||||
"NumPy not found in the current Python environment. "
|
||||
"langchain will use a pure Python implementation for embedding distance "
|
||||
"operations, which may significantly impact performance, especially for large "
|
||||
"datasets. For optimal speed and efficiency, consider installing NumPy: "
|
||||
"pip install numpy"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def _embedding_factory() -> Embeddings:
|
||||
"""Create an Embeddings object.
|
||||
Returns:
|
||||
@ -158,7 +188,7 @@ class _EmbeddingDistanceChainMixin(Chain):
|
||||
raise ValueError(f"Invalid metric: {metric}")
|
||||
|
||||
@staticmethod
|
||||
def _cosine_distance(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
||||
def _cosine_distance(a: Any, b: Any) -> Any:
|
||||
"""Compute the cosine distance between two vectors.
|
||||
|
||||
Args:
|
||||
@ -179,7 +209,7 @@ class _EmbeddingDistanceChainMixin(Chain):
|
||||
return 1.0 - cosine_similarity(a, b)
|
||||
|
||||
@staticmethod
|
||||
def _euclidean_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
def _euclidean_distance(a: Any, b: Any) -> Any:
|
||||
"""Compute the Euclidean distance between two vectors.
|
||||
|
||||
Args:
|
||||
@ -189,10 +219,15 @@ class _EmbeddingDistanceChainMixin(Chain):
|
||||
Returns:
|
||||
np.floating: The Euclidean distance.
|
||||
"""
|
||||
return np.linalg.norm(a - b)
|
||||
if _check_numpy():
|
||||
import numpy as np
|
||||
|
||||
return np.linalg.norm(a - b)
|
||||
|
||||
return sum((x - y) * (x - y) for x, y in zip(a, b)) ** 0.5
|
||||
|
||||
@staticmethod
|
||||
def _manhattan_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
def _manhattan_distance(a: Any, b: Any) -> Any:
|
||||
"""Compute the Manhattan distance between two vectors.
|
||||
|
||||
Args:
|
||||
@ -202,10 +237,14 @@ class _EmbeddingDistanceChainMixin(Chain):
|
||||
Returns:
|
||||
np.floating: The Manhattan distance.
|
||||
"""
|
||||
return np.sum(np.abs(a - b))
|
||||
if _check_numpy():
|
||||
np = _import_numpy()
|
||||
return np.sum(np.abs(a - b))
|
||||
|
||||
return sum(abs(x - y) for x, y in zip(a, b))
|
||||
|
||||
@staticmethod
|
||||
def _chebyshev_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
def _chebyshev_distance(a: Any, b: Any) -> Any:
|
||||
"""Compute the Chebyshev distance between two vectors.
|
||||
|
||||
Args:
|
||||
@ -215,10 +254,14 @@ class _EmbeddingDistanceChainMixin(Chain):
|
||||
Returns:
|
||||
np.floating: The Chebyshev distance.
|
||||
"""
|
||||
return np.max(np.abs(a - b))
|
||||
if _check_numpy():
|
||||
np = _import_numpy()
|
||||
return np.max(np.abs(a - b))
|
||||
|
||||
return max(abs(x - y) for x, y in zip(a, b))
|
||||
|
||||
@staticmethod
|
||||
def _hamming_distance(a: np.ndarray, b: np.ndarray) -> np.floating:
|
||||
def _hamming_distance(a: Any, b: Any) -> Any:
|
||||
"""Compute the Hamming distance between two vectors.
|
||||
|
||||
Args:
|
||||
@ -228,9 +271,13 @@ class _EmbeddingDistanceChainMixin(Chain):
|
||||
Returns:
|
||||
np.floating: The Hamming distance.
|
||||
"""
|
||||
return np.mean(a != b)
|
||||
if _check_numpy():
|
||||
np = _import_numpy()
|
||||
return np.mean(a != b)
|
||||
|
||||
def _compute_score(self, vectors: np.ndarray) -> float:
|
||||
return sum(1 for x, y in zip(a, b) if x != y) / len(a)
|
||||
|
||||
def _compute_score(self, vectors: Any) -> float:
|
||||
"""Compute the score based on the distance metric.
|
||||
|
||||
Args:
|
||||
@ -240,8 +287,11 @@ class _EmbeddingDistanceChainMixin(Chain):
|
||||
float: The computed score.
|
||||
"""
|
||||
metric = self._get_metric(self.distance_metric)
|
||||
score = metric(vectors[0].reshape(1, -1), vectors[1].reshape(1, -1)).item()
|
||||
return score
|
||||
if _check_numpy() and isinstance(vectors, _import_numpy().ndarray):
|
||||
score = metric(vectors[0].reshape(1, -1), vectors[1].reshape(1, -1)).item()
|
||||
else:
|
||||
score = metric(vectors[0], vectors[1])
|
||||
return float(score)
|
||||
|
||||
|
||||
class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
||||
@ -292,9 +342,12 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
vectors = np.array(
|
||||
self.embeddings.embed_documents([inputs["prediction"], inputs["reference"]])
|
||||
vectors = self.embeddings.embed_documents(
|
||||
[inputs["prediction"], inputs["reference"]]
|
||||
)
|
||||
if _check_numpy():
|
||||
np = _import_numpy()
|
||||
vectors = np.array(vectors)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
@ -313,13 +366,15 @@ class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator):
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
embedded = await self.embeddings.aembed_documents(
|
||||
vectors = await self.embeddings.aembed_documents(
|
||||
[
|
||||
inputs["prediction"],
|
||||
inputs["reference"],
|
||||
]
|
||||
)
|
||||
vectors = np.array(embedded)
|
||||
if _check_numpy():
|
||||
np = _import_numpy()
|
||||
vectors = np.array(vectors)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
@ -432,14 +487,15 @@ class PairwiseEmbeddingDistanceEvalChain(
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
vectors = np.array(
|
||||
self.embeddings.embed_documents(
|
||||
[
|
||||
inputs["prediction"],
|
||||
inputs["prediction_b"],
|
||||
]
|
||||
)
|
||||
vectors = self.embeddings.embed_documents(
|
||||
[
|
||||
inputs["prediction"],
|
||||
inputs["prediction_b"],
|
||||
]
|
||||
)
|
||||
if _check_numpy():
|
||||
np = _import_numpy()
|
||||
vectors = np.array(vectors)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
@ -458,13 +514,15 @@ class PairwiseEmbeddingDistanceEvalChain(
|
||||
Returns:
|
||||
Dict[str, Any]: The computed score.
|
||||
"""
|
||||
embedded = await self.embeddings.aembed_documents(
|
||||
vectors = await self.embeddings.aembed_documents(
|
||||
[
|
||||
inputs["prediction"],
|
||||
inputs["prediction_b"],
|
||||
]
|
||||
)
|
||||
vectors = np.array(embedded)
|
||||
if _check_numpy():
|
||||
np = _import_numpy()
|
||||
vectors = np.array(vectors)
|
||||
score = self._compute_score(vectors)
|
||||
return {"score": score}
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
from typing import Callable, Dict, Optional, Sequence
|
||||
|
||||
import numpy as np
|
||||
from langchain_core.callbacks.manager import Callbacks
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
@ -69,6 +68,13 @@ class EmbeddingsFilter(BaseDocumentCompressor):
|
||||
"To use please install langchain-community "
|
||||
"with `pip install langchain-community`."
|
||||
)
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import numpy, please install with `pip install numpy`."
|
||||
) from e
|
||||
stateful_documents = get_stateful_documents(documents)
|
||||
embedded_documents = _get_embeddings_from_stateful_docs(
|
||||
self.embeddings, stateful_documents
|
||||
@ -104,6 +110,13 @@ class EmbeddingsFilter(BaseDocumentCompressor):
|
||||
"To use please install langchain-community "
|
||||
"with `pip install langchain-community`."
|
||||
)
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import numpy, please install with `pip install numpy`."
|
||||
) from e
|
||||
stateful_documents = get_stateful_documents(documents)
|
||||
embedded_documents = await _aget_embeddings_from_stateful_docs(
|
||||
self.embeddings, stateful_documents
|
||||
|
5067
libs/langchain/poetry.lock
generated
Normal file
5067
libs/langchain/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -14,8 +14,6 @@ dependencies = [
|
||||
"SQLAlchemy<3,>=1.4",
|
||||
"requests<3,>=2",
|
||||
"PyYAML>=5.3",
|
||||
"numpy<2,>=1.26.4; python_version < \"3.12\"",
|
||||
"numpy<3,>=1.26.2; python_version >= \"3.12\"",
|
||||
"async-timeout<5.0.0,>=4.0.0; python_version < \"3.11\"",
|
||||
]
|
||||
name = "langchain"
|
||||
@ -74,6 +72,7 @@ test = [
|
||||
"langchain-openai",
|
||||
"toml>=0.10.2",
|
||||
"packaging>=24.2",
|
||||
"numpy<3,>=1.26.4",
|
||||
]
|
||||
codespell = ["codespell<3.0.0,>=2.2.0"]
|
||||
test_integration = [
|
||||
@ -102,6 +101,7 @@ typing = [
|
||||
"mypy-protobuf<4.0.0,>=3.0.0",
|
||||
"langchain-core",
|
||||
"langchain-text-splitters",
|
||||
"numpy<3,>=1.26.4",
|
||||
]
|
||||
dev = [
|
||||
"jupyter<2.0.0,>=1.0.0",
|
||||
|
@ -37,7 +37,6 @@ def test_required_dependencies(uv_conf: Mapping[str, Any]) -> None:
|
||||
"langchain-core",
|
||||
"langchain-text-splitters",
|
||||
"langsmith",
|
||||
"numpy",
|
||||
"pydantic",
|
||||
"requests",
|
||||
]
|
||||
@ -82,5 +81,6 @@ def test_test_group_dependencies(uv_conf: Mapping[str, Any]) -> None:
|
||||
"requests-mock",
|
||||
# TODO: temporary hack since cffi 1.17.1 doesn't work with py 3.9.
|
||||
"cffi",
|
||||
"numpy",
|
||||
]
|
||||
)
|
||||
|
@ -2247,8 +2247,6 @@ dependencies = [
|
||||
{ name = "langchain-core" },
|
||||
{ name = "langchain-text-splitters" },
|
||||
{ name = "langsmith" },
|
||||
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
|
||||
{ name = "numpy", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pyyaml" },
|
||||
{ name = "requests" },
|
||||
@ -2329,6 +2327,8 @@ test = [
|
||||
{ name = "langchain-tests" },
|
||||
{ name = "langchain-text-splitters" },
|
||||
{ name = "lark" },
|
||||
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
|
||||
{ name = "numpy", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pytest" },
|
||||
@ -2359,6 +2359,8 @@ typing = [
|
||||
{ name = "langchain-text-splitters" },
|
||||
{ name = "mypy" },
|
||||
{ name = "mypy-protobuf" },
|
||||
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
|
||||
{ name = "numpy", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
|
||||
{ name = "types-chardet" },
|
||||
{ name = "types-pytz" },
|
||||
{ name = "types-pyyaml" },
|
||||
@ -2389,8 +2391,6 @@ requires-dist = [
|
||||
{ name = "langchain-together", marker = "extra == 'together'" },
|
||||
{ name = "langchain-xai", marker = "extra == 'xai'" },
|
||||
{ name = "langsmith", specifier = ">=0.1.17,<0.4" },
|
||||
{ name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.26.4,<2" },
|
||||
{ name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.2,<3" },
|
||||
{ name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
|
||||
{ name = "pyyaml", specifier = ">=5.3" },
|
||||
{ name = "requests", specifier = ">=2,<3" },
|
||||
@ -2422,6 +2422,7 @@ test = [
|
||||
{ name = "langchain-tests", editable = "../standard-tests" },
|
||||
{ name = "langchain-text-splitters", editable = "../text-splitters" },
|
||||
{ name = "lark", specifier = ">=1.1.5,<2.0.0" },
|
||||
{ name = "numpy", specifier = ">=1.26.4,<3" },
|
||||
{ name = "packaging", specifier = ">=24.2" },
|
||||
{ name = "pandas", specifier = ">=2.0.0,<3.0.0" },
|
||||
{ name = "pytest", specifier = ">=8,<9" },
|
||||
@ -2452,6 +2453,7 @@ typing = [
|
||||
{ name = "langchain-text-splitters", editable = "../text-splitters" },
|
||||
{ name = "mypy", specifier = ">=1.10,<2.0" },
|
||||
{ name = "mypy-protobuf", specifier = ">=3.0.0,<4.0.0" },
|
||||
{ name = "numpy", specifier = ">=1.26.4,<3" },
|
||||
{ name = "types-chardet", specifier = ">=5.0.4.6,<6.0.0.0" },
|
||||
{ name = "types-pytz", specifier = ">=2023.3.0.0,<2024.0.0.0" },
|
||||
{ name = "types-pyyaml", specifier = ">=6.0.12.2,<7.0.0.0" },
|
||||
|
Loading…
Reference in New Issue
Block a user