From 1acfe8635382faaa7598c1fc7b6486357c986c8d Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 9 Oct 2023 14:56:55 -0700 Subject: [PATCH] Accelerating Math Utils with SimSIMD (#11566) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LangChain relies on NumPy to compute cosine distances, which becomes a bottleneck with the growing dimensionality and number of embeddings. To avoid this bottleneck, in our libraries at [Unum](https://github.com/unum-cloud), we have created a specialized package - [SimSIMD](https://github.com/ashvardanian/simsimd), that knows how to use newer hardware capabilities. Compared to SciPy and NumPy, it reaches 3x-200x performance for various data types. Since publication, several LangChain users have asked me if I can integrate it into LangChain to accelerate their workflows, so here I am 🤗 ## Benchmarking To conduct benchmarks locally, run this in your Jupyter: ```py import numpy as np import scipy as sp import simsimd as simd import timeit as tt def cosine_similarity_np(X: np.ndarray, Y: np.ndarray) -> np.ndarray: X_norm = np.linalg.norm(X, axis=1) Y_norm = np.linalg.norm(Y, axis=1) with np.errstate(divide="ignore", invalid="ignore"): similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm) similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0 return similarity def cosine_similarity_sp(X: np.ndarray, Y: np.ndarray) -> np.ndarray: return 1 - sp.spatial.distance.cdist(X, Y, metric='cosine') def cosine_similarity_simd(X: np.ndarray, Y: np.ndarray) -> np.ndarray: return 1 - simd.cdist(X, Y, metric='cosine') X = np.random.randn(1, 1536).astype(np.float32) Y = np.random.randn(1, 1536).astype(np.float32) repeat = 1000 print("NumPy: {:,.0f} ops/s, SciPy: {:,.0f} ops/s, SimSIMD: {:,.0f} ops/s".format( repeat / tt.timeit(lambda: cosine_similarity_np(X, Y), number=repeat), repeat / tt.timeit(lambda: cosine_similarity_sp(X, Y), number=repeat), repeat / tt.timeit(lambda: cosine_similarity_simd(X, Y), number=repeat), )) ``` ## Results I ran this on an M2 Pro Macbook for various data types and different number of rows in `X` and reformatted the results as a table for readability: | Data Type | NumPy | SciPy | SimSIMD | | :--- | ---: | ---: | ---: | | `f32, 1` | 59,114 ops/s | 80,330 ops/s | 475,351 ops/s | | `f16, 1` | 32,880 ops/s | 82,420 ops/s | 650,177 ops/s | | `i8, 1` | 47,916 ops/s | 115,084 ops/s | 866,958 ops/s | | `f32, 10` | 40,135 ops/s | 24,305 ops/s | 185,373 ops/s | | `f16, 10` | 7,041 ops/s | 17,596 ops/s | 192,058 ops/s | | `f16, 10` | 21,989 ops/s | 25,064 ops/s | 619,131 ops/s | | `f32, 100` | 3,536 ops/s | 3,094 ops/s | 24,206 ops/s | | `f16, 100` | 900 ops/s | 2,014 ops/s | 23,364 ops/s | | `i8, 100` | 5,510 ops/s | 3,214 ops/s | 143,922 ops/s | It's important to note that SimSIMD will underperform if both matrices are huge. That, however, seems to be an uncommon usage pattern for LangChain users. You can find a much more detailed performance report for different hardware models here: - [Apple M2 Pro](https://ashvardanian.com/posts/simsimd-faster-scipy/#appendix-1-performance-on-apple-m2-pro). - [4th Gen Intel Xeon Platinum](https://ashvardanian.com/posts/simsimd-faster-scipy/#appendix-2-performance-on-4th-gen-intel-xeon-platinum-8480). - [AWS Graviton 3](https://ashvardanian.com/posts/simsimd-faster-scipy/#appendix-3-performance-on-aws-graviton-3). ## Additional Notes 1. Previous version used `X = np.array(X)`, to repackage lists of lists. It's an anti-pattern, as it will use double-precision floating-point numbers, which are slow on both CPUs and GPUs. I have replaced it with `X = np.array(X, dtype=np.float32)`, but a more selective approach should be discussed. 2. In numerical computations, it's recommended to explicitly define tolerance levels, which were previously avoided in `np.allclose(expected, actual)` calls. For now, I've set absolute tolerance to distance computation errors as 0.01: `np.allclose(expected, actual, atol=1e-2)`. --- - **Dependencies:** adds `simsimd` dependency - **Tag maintainer:** @hwchase17 - **Twitter handle:** @ashvardanian --------- Co-authored-by: Bagatur --- libs/langchain/langchain/utils/math.py | 31 ++++++++++++++----- .../test_math.py} | 0 2 files changed, 24 insertions(+), 7 deletions(-) rename libs/langchain/tests/unit_tests/{test_math_utils.py => utils/test_math.py} (100%) diff --git a/libs/langchain/langchain/utils/math.py b/libs/langchain/langchain/utils/math.py index 77784ba2a49..99d47368197 100644 --- a/libs/langchain/langchain/utils/math.py +++ b/libs/langchain/langchain/utils/math.py @@ -1,8 +1,11 @@ """Math utils.""" +import logging from typing import List, Optional, Tuple, Union import numpy as np +logger = logging.getLogger(__name__) + Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray] @@ -10,6 +13,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: """Row-wise cosine similarity between two equal-width matrices.""" if len(X) == 0 or len(Y) == 0: return np.array([]) + X = np.array(X) Y = np.array(Y) if X.shape[1] != Y.shape[1]: @@ -17,14 +21,27 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: f"Number of columns in X and Y must be the same. X has shape {X.shape} " f"and Y has shape {Y.shape}." ) + try: + import simsimd as simd - X_norm = np.linalg.norm(X, axis=1) - Y_norm = np.linalg.norm(Y, axis=1) - # Ignore divide by zero errors run time warnings as those are handled below. - with np.errstate(divide="ignore", invalid="ignore"): - similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm) - similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0 - return similarity + X = np.array(X, dtype=np.float32) + Y = np.array(Y, dtype=np.float32) + Z = 1 - simd.cdist(X, Y, metric="cosine") + if isinstance(Z, float): + return np.array([Z]) + return Z + except ImportError: + logger.info( + "Unable to import simsimd, defaulting to NumPy implementation. If you want " + "to use simsimd please install with `pip install simsimd`." + ) + X_norm = np.linalg.norm(X, axis=1) + Y_norm = np.linalg.norm(Y, axis=1) + # Ignore divide by zero errors run time warnings as those are handled below. + with np.errstate(divide="ignore", invalid="ignore"): + similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm) + similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0 + return similarity def cosine_similarity_top_k( diff --git a/libs/langchain/tests/unit_tests/test_math_utils.py b/libs/langchain/tests/unit_tests/utils/test_math.py similarity index 100% rename from libs/langchain/tests/unit_tests/test_math_utils.py rename to libs/langchain/tests/unit_tests/utils/test_math.py