core[patch]: Convert SimSIMD back to NumPy (#19473)

This patch fixes the #18022 issue, converting the SimSIMD internal
zero-copy outputs to NumPy.

I've also noticed, that oftentimes `dtype=np.float32` conversion is used
before passing to SimSIMD. Which numeric types do LangChain users
generally care about? We support `float64`, `float32`, `float16`, and
`int8` for cosine distances and `float16` seems reasonable for
practically any kind of embeddings and any modern piece of hardware, so
we can change that part as well 🤗
This commit is contained in:
Ash Vardanian 2024-03-25 16:36:26 -07:00 committed by GitHub
parent 980658cb47
commit d01bad5169
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 4 additions and 4 deletions

View File

@ -29,7 +29,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
Z = 1 - simd.cdist(X, Y, metric="cosine") Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float): if isinstance(Z, float):
return np.array([Z]) return np.array([Z])
return Z return np.array(Z)
except ImportError: except ImportError:
logger.info( logger.info(
"Unable to import simsimd, defaulting to NumPy implementation. If you want " "Unable to import simsimd, defaulting to NumPy implementation. If you want "

View File

@ -79,7 +79,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
Z = 1 - simd.cdist(X, Y, metric="cosine") Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float): if isinstance(Z, float):
return np.array([Z]) return np.array([Z])
return Z return np.array(Z)
except ImportError: except ImportError:
X_norm = np.linalg.norm(X, axis=1) X_norm = np.linalg.norm(X, axis=1)
Y_norm = np.linalg.norm(Y, axis=1) Y_norm = np.linalg.norm(Y, axis=1)

View File

@ -38,7 +38,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
Z = 1 - simd.cdist(X, Y, metric="cosine") Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float): if isinstance(Z, float):
return np.array([Z]) return np.array([Z])
return Z return np.array(Z)
except ImportError: except ImportError:
logger.info( logger.info(
"Unable to import simsimd, defaulting to NumPy implementation. If you want " "Unable to import simsimd, defaulting to NumPy implementation. If you want "

View File

@ -69,7 +69,7 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
Z = 1 - simd.cdist(X, Y, metric="cosine") Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float): if isinstance(Z, float):
return np.array([Z]) return np.array([Z])
return Z return np.array(Z)
except ImportError: except ImportError:
X_norm = np.linalg.norm(X, axis=1) X_norm = np.linalg.norm(X, axis=1)
Y_norm = np.linalg.norm(Y, axis=1) Y_norm = np.linalg.norm(Y, axis=1)