feat: port various nit changes from wip-v0.4 (#32506)

Lots of work that wasn't directly related to core improvements/messages/testing functionality
2025-09-08 06:23:20 +00:00 · 2025-08-11 15:09:08 -04:00
parent 7db9e60601
commit ee4c2510eb
63 changed files with 2213 additions and 2862 deletions
--- a/libs/langchain/langchain/evaluation/embedding_distance/base.py
+++ b/libs/langchain/langchain/evaluation/embedding_distance/base.py
@@ -201,15 +201,41 @@ class _EmbeddingDistanceChainMixin(Chain):
            np.ndarray: The cosine distance.
        """
        try:
-            from langchain_community.utils.math import cosine_similarity
-        except ImportError as e:
-            msg = (
-                "The cosine_similarity function is required to compute cosine distance."
-                " Please install the langchain-community package using"
-                " `pip install langchain-community`."
-            )
-            raise ImportError(msg) from e
-        return 1.0 - cosine_similarity(a, b)
+            from langchain_core.vectorstores.utils import _cosine_similarity
+
+            return 1.0 - _cosine_similarity(a, b)
+        except ImportError:
+            # Fallback to scipy if available
+            try:
+                from scipy.spatial.distance import cosine
+
+                return cosine(a.flatten(), b.flatten())
+            except ImportError:
+                # Pure numpy fallback
+                if _check_numpy():
+                    np = _import_numpy()
+                    a_flat = a.flatten()
+                    b_flat = b.flatten()
+                    dot_product = np.dot(a_flat, b_flat)
+                    norm_a = np.linalg.norm(a_flat)
+                    norm_b = np.linalg.norm(b_flat)
+                    if norm_a == 0 or norm_b == 0:
+                        return 0.0
+                    return 1.0 - (dot_product / (norm_a * norm_b))
+                # Pure Python implementation
+                a_flat = a if hasattr(a, "__len__") else [a]
+                b_flat = b if hasattr(b, "__len__") else [b]
+                if hasattr(a, "flatten"):
+                    a_flat = a.flatten()
+                if hasattr(b, "flatten"):
+                    b_flat = b.flatten()
+
+                dot_product = sum(x * y for x, y in zip(a_flat, b_flat))
+                norm_a = sum(x * x for x in a_flat) ** 0.5
+                norm_b = sum(x * x for x in b_flat) ** 0.5
+                if norm_a == 0 or norm_b == 0:
+                    return 0.0
+                return 1.0 - (dot_product / (norm_a * norm_b))

    @staticmethod
    def _euclidean_distance(a: Any, b: Any) -> Any:
@@ -222,12 +248,17 @@ class _EmbeddingDistanceChainMixin(Chain):
        Returns:
            np.floating: The Euclidean distance.
        """
-        if _check_numpy():
-            import numpy as np
+        try:
+            from scipy.spatial.distance import euclidean

-            return np.linalg.norm(a - b)
+            return euclidean(a.flatten(), b.flatten())
+        except ImportError:
+            if _check_numpy():
+                import numpy as np

-        return sum((x - y) * (x - y) for x, y in zip(a, b)) ** 0.5
+                return np.linalg.norm(a - b)
+
+            return sum((x - y) * (x - y) for x, y in zip(a, b)) ** 0.5

    @staticmethod
    def _manhattan_distance(a: Any, b: Any) -> Any:
@@ -240,11 +271,16 @@ class _EmbeddingDistanceChainMixin(Chain):
        Returns:
            np.floating: The Manhattan distance.
        """
-        if _check_numpy():
-            np = _import_numpy()
-            return np.sum(np.abs(a - b))
+        try:
+            from scipy.spatial.distance import cityblock

-        return sum(abs(x - y) for x, y in zip(a, b))
+            return cityblock(a.flatten(), b.flatten())
+        except ImportError:
+            if _check_numpy():
+                np = _import_numpy()
+                return np.sum(np.abs(a - b))
+
+            return sum(abs(x - y) for x, y in zip(a, b))

    @staticmethod
    def _chebyshev_distance(a: Any, b: Any) -> Any:
@@ -257,11 +293,16 @@ class _EmbeddingDistanceChainMixin(Chain):
        Returns:
            np.floating: The Chebyshev distance.
        """
-        if _check_numpy():
-            np = _import_numpy()
-            return np.max(np.abs(a - b))
+        try:
+            from scipy.spatial.distance import chebyshev

-        return max(abs(x - y) for x, y in zip(a, b))
+            return chebyshev(a.flatten(), b.flatten())
+        except ImportError:
+            if _check_numpy():
+                np = _import_numpy()
+                return np.max(np.abs(a - b))
+
+            return max(abs(x - y) for x, y in zip(a, b))

    @staticmethod
    def _hamming_distance(a: Any, b: Any) -> Any:
@@ -274,11 +315,16 @@ class _EmbeddingDistanceChainMixin(Chain):
        Returns:
            np.floating: The Hamming distance.
        """
-        if _check_numpy():
-            np = _import_numpy()
-            return np.mean(a != b)
+        try:
+            from scipy.spatial.distance import hamming

-        return sum(1 for x, y in zip(a, b) if x != y) / len(a)
+            return hamming(a.flatten(), b.flatten())
+        except ImportError:
+            if _check_numpy():
+                np = _import_numpy()
+                return np.mean(a != b)
+
+            return sum(1 for x, y in zip(a, b) if x != y) / len(a)

    def _compute_score(self, vectors: Any) -> float:
        """Compute the score based on the distance metric.
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@@ -25,9 +25,9 @@ readme = "README.md"
 community = ["langchain-community"]
 anthropic = ["langchain-anthropic"]
 openai = ["langchain-openai"]
-azure-ai = ["langchain-azure-ai"]
-cohere = ["langchain-cohere"]
-google-vertexai = ["langchain-google-vertexai"]
+# azure-ai = ["langchain-azure-ai"]
+# cohere = ["langchain-cohere"]
+# google-vertexai = ["langchain-google-vertexai"]
 google-genai = ["langchain-google-genai"]
 fireworks = ["langchain-fireworks"]
 ollama = ["langchain-ollama"]
@@ -35,9 +35,9 @@ together = ["langchain-together"]
 mistralai = ["langchain-mistralai"]
 huggingface = ["langchain-huggingface"]
 groq = ["langchain-groq"]
-aws = ["langchain-aws"]
+# aws = ["langchain-aws"]
 deepseek = ["langchain-deepseek"]
-xai = ["langchain-xai"]
+# xai = ["langchain-xai"]
 perplexity = ["langchain-perplexity"]

 [project.urls]
--- a/libs/langchain/tests/unit_tests/agents/format_scratchpad/test_openai_tools.py
+++ b/libs/langchain/tests/unit_tests/agents/format_scratchpad/test_openai_tools.py
@@ -53,7 +53,12 @@ def test_calls_convert_agent_action_to_messages() -> None:
    message4 = AIMessage(
        content="",
        tool_calls=[
-            ToolCall(name="exponentiate", args={"a": 3, "b": 5}, id="call_abc02468"),
+            ToolCall(
+                name="exponentiate",
+                args={"a": 3, "b": 5},
+                id="call_abc02468",
+                type="tool_call",
+            ),
        ],
    )
    actions4 = parse_ai_message_to_openai_tool_action(message4)
--- a/libs/langchain/tests/unit_tests/agents/test_agent.py
+++ b/libs/langchain/tests/unit_tests/agents/test_agent.py
@@ -1008,7 +1008,7 @@ def _make_tools_invocation(name_to_arguments: dict[str, dict[str, Any]]) -> AIMe
        for idx, (name, arguments) in enumerate(name_to_arguments.items())
    ]
    tool_calls = [
-        ToolCall(name=name, args=args, id=str(idx))
+        ToolCall(name=name, args=args, id=str(idx), type="tool_call")
        for idx, (name, args) in enumerate(name_to_arguments.items())
    ]
    return AIMessage(
--- a/libs/langchain/uv.lock
+++ b/libs/langchain/uv.lock