From 1acfe8635382faaa7598c1fc7b6486357c986c8d Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 9 Oct 2023 14:56:55 -0700
Subject: [PATCH] Accelerating Math Utils with SimSIMD (#11566)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LangChain relies on NumPy to compute cosine distances, which becomes a
bottleneck with the growing dimensionality and number of embeddings. To
avoid this bottleneck, in our libraries at
[Unum](https://github.com/unum-cloud), we have created a specialized
package - [SimSIMD](https://github.com/ashvardanian/simsimd), that knows
how to use newer hardware capabilities. Compared to SciPy and NumPy, it
reaches 3x-200x performance for various data types. Since publication,
several LangChain users have asked me if I can integrate it into
LangChain to accelerate their workflows, so here I am 🤗

## Benchmarking

To conduct benchmarks locally, run this in your Jupyter:

```py
import numpy as np
import scipy as sp
import simsimd as simd
import timeit as tt

def cosine_similarity_np(X: np.ndarray, Y: np.ndarray) -> np.ndarray:
    X_norm = np.linalg.norm(X, axis=1)
    Y_norm = np.linalg.norm(Y, axis=1)
    with np.errstate(divide="ignore", invalid="ignore"):
        similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm)
    similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0
    return similarity

def cosine_similarity_sp(X: np.ndarray, Y: np.ndarray) -> np.ndarray:
    return 1 - sp.spatial.distance.cdist(X, Y, metric='cosine')

def cosine_similarity_simd(X: np.ndarray, Y: np.ndarray) -> np.ndarray:
    return 1 - simd.cdist(X, Y, metric='cosine')

X = np.random.randn(1, 1536).astype(np.float32)
Y = np.random.randn(1, 1536).astype(np.float32)
repeat = 1000

print("NumPy: {:,.0f} ops/s, SciPy: {:,.0f} ops/s, SimSIMD: {:,.0f} ops/s".format(
    repeat / tt.timeit(lambda: cosine_similarity_np(X, Y), number=repeat),
    repeat / tt.timeit(lambda: cosine_similarity_sp(X, Y), number=repeat),
    repeat / tt.timeit(lambda: cosine_similarity_simd(X, Y), number=repeat),
))
```

## Results

I ran this on an M2 Pro Macbook for various data types and different
number of rows in `X` and reformatted the results as a table for
readability:

| Data Type | NumPy | SciPy | SimSIMD |
| :--- | ---: | ---: | ---: |
| `f32, 1` | 59,114 ops/s | 80,330 ops/s | 475,351 ops/s |
| `f16, 1` | 32,880 ops/s | 82,420 ops/s | 650,177 ops/s |
| `i8, 1` | 47,916 ops/s | 115,084 ops/s | 866,958 ops/s |
| `f32, 10` | 40,135 ops/s | 24,305 ops/s | 185,373 ops/s |
| `f16, 10` | 7,041 ops/s | 17,596 ops/s | 192,058 ops/s |
| `f16, 10` | 21,989 ops/s | 25,064 ops/s | 619,131 ops/s |
| `f32, 100` | 3,536 ops/s | 3,094 ops/s | 24,206 ops/s |
| `f16, 100` | 900 ops/s | 2,014 ops/s | 23,364 ops/s |
| `i8, 100` | 5,510 ops/s | 3,214 ops/s | 143,922 ops/s |

It's important to note that SimSIMD will underperform if both matrices
are huge.
That, however, seems to be an uncommon usage pattern for LangChain
users.
You can find a much more detailed performance report for different
hardware models here:

- [Apple M2
Pro](https://ashvardanian.com/posts/simsimd-faster-scipy/#appendix-1-performance-on-apple-m2-pro).
- [4th Gen Intel Xeon
Platinum](https://ashvardanian.com/posts/simsimd-faster-scipy/#appendix-2-performance-on-4th-gen-intel-xeon-platinum-8480).
- [AWS Graviton
3](https://ashvardanian.com/posts/simsimd-faster-scipy/#appendix-3-performance-on-aws-graviton-3).

## Additional Notes

1. Previous version used `X = np.array(X)`, to repackage lists of lists.
It's an anti-pattern, as it will use double-precision floating-point
numbers, which are slow on both CPUs and GPUs. I have replaced it with
`X = np.array(X, dtype=np.float32)`, but a more selective approach
should be discussed.
2. In numerical computations, it's recommended to explicitly define
tolerance levels, which were previously avoided in
`np.allclose(expected, actual)` calls. For now, I've set absolute
tolerance to distance computation errors as 0.01: `np.allclose(expected,
actual, atol=1e-2)`.

---

  - **Dependencies:** adds `simsimd` dependency
  - **Tag maintainer:** @hwchase17
  - **Twitter handle:** @ashvardanian

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 libs/langchain/langchain/utils/math.py        | 31 ++++++++++++++-----
 .../test_math.py}                             |  0
 2 files changed, 24 insertions(+), 7 deletions(-)
 rename libs/langchain/tests/unit_tests/{test_math_utils.py => utils/test_math.py} (100%)

diff --git a/libs/langchain/langchain/utils/math.py b/libs/langchain/langchain/utils/math.py
index 77784ba2a49..99d47368197 100644
--- a/libs/langchain/langchain/utils/math.py
+++ b/libs/langchain/langchain/utils/math.py
@@ -1,8 +1,11 @@
 """Math utils."""
+import logging
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
 
+logger = logging.getLogger(__name__)
+
 Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
 
 
@@ -10,6 +13,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
     """Row-wise cosine similarity between two equal-width matrices."""
     if len(X) == 0 or len(Y) == 0:
         return np.array([])
+
     X = np.array(X)
     Y = np.array(Y)
     if X.shape[1] != Y.shape[1]:
@@ -17,14 +21,27 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
             f"Number of columns in X and Y must be the same. X has shape {X.shape} "
             f"and Y has shape {Y.shape}."
         )
+    try:
+        import simsimd as simd
 
-    X_norm = np.linalg.norm(X, axis=1)
-    Y_norm = np.linalg.norm(Y, axis=1)
-    # Ignore divide by zero errors run time warnings as those are handled below.
-    with np.errstate(divide="ignore", invalid="ignore"):
-        similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm)
-    similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0
-    return similarity
+        X = np.array(X, dtype=np.float32)
+        Y = np.array(Y, dtype=np.float32)
+        Z = 1 - simd.cdist(X, Y, metric="cosine")
+        if isinstance(Z, float):
+            return np.array([Z])
+        return Z
+    except ImportError:
+        logger.info(
+            "Unable to import simsimd, defaulting to NumPy implementation. If you want "
+            "to use simsimd please install with `pip install simsimd`."
+        )
+        X_norm = np.linalg.norm(X, axis=1)
+        Y_norm = np.linalg.norm(Y, axis=1)
+        # Ignore divide by zero errors run time warnings as those are handled below.
+        with np.errstate(divide="ignore", invalid="ignore"):
+            similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm)
+        similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0
+        return similarity
 
 
 def cosine_similarity_top_k(
diff --git a/libs/langchain/tests/unit_tests/test_math_utils.py b/libs/langchain/tests/unit_tests/utils/test_math.py
similarity index 100%
rename from libs/langchain/tests/unit_tests/test_math_utils.py
rename to libs/langchain/tests/unit_tests/utils/test_math.py