openai[patch]: remove numpy dep (#18034)

This commit is contained in:
Erick Friis
2024-02-23 13:12:05 -08:00
committed by GitHub
parent e8be34f8c7
commit a05fb19f42
4 changed files with 204 additions and 115 deletions

View File

@@ -18,7 +18,6 @@ from typing import (
cast,
)
import numpy as np
import openai
import tiktoken
from langchain_core.embeddings import Embeddings
@@ -209,9 +208,11 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
"please use the `AzureOpenAIEmbeddings` class."
)
client_params = {
"api_key": values["openai_api_key"].get_secret_value()
if values["openai_api_key"]
else None,
"api_key": (
values["openai_api_key"].get_secret_value()
if values["openai_api_key"]
else None
),
"organization": values["openai_organization"],
"base_url": values["openai_api_base"],
"timeout": values["request_timeout"],
@@ -346,8 +347,22 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
average_embedded = average_embedded.model_dump()
average = average_embedded["data"][0]["embedding"]
else:
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
embeddings[i] = (average / np.linalg.norm(average)).tolist()
# should be same as
# average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
total_weight = sum(num_tokens_in_batch[i])
average = [
sum(
val * weight
for val, weight in zip(embedding, num_tokens_in_batch[i])
)
/ total_weight
for embedding in zip(*_result)
]
# should be same as
# embeddings[i] = (average / np.linalg.norm(average)).tolist()
magnitude = sum(val**2 for val in average) ** 0.5
embeddings[i] = [val / magnitude for val in average]
return embeddings
@@ -456,8 +471,21 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
average_embedded = average_embedded.model_dump()
average = average_embedded["data"][0]["embedding"]
else:
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
embeddings[i] = (average / np.linalg.norm(average)).tolist()
# should be same as
# average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
total_weight = sum(num_tokens_in_batch[i])
average = [
sum(
val * weight
for val, weight in zip(embedding, num_tokens_in_batch[i])
)
/ total_weight
for embedding in zip(*_result)
]
# should be same as
# embeddings[i] = (average / np.linalg.norm(average)).tolist()
magnitude = sum(val**2 for val in average) ** 0.5
embeddings[i] = [val / magnitude for val in average]
return embeddings