mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-03 03:59:42 +00:00
openai[patch]: remove numpy dep (#18034)
This commit is contained in:
@@ -18,7 +18,6 @@ from typing import (
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import openai
|
||||
import tiktoken
|
||||
from langchain_core.embeddings import Embeddings
|
||||
@@ -209,9 +208,11 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
"please use the `AzureOpenAIEmbeddings` class."
|
||||
)
|
||||
client_params = {
|
||||
"api_key": values["openai_api_key"].get_secret_value()
|
||||
if values["openai_api_key"]
|
||||
else None,
|
||||
"api_key": (
|
||||
values["openai_api_key"].get_secret_value()
|
||||
if values["openai_api_key"]
|
||||
else None
|
||||
),
|
||||
"organization": values["openai_organization"],
|
||||
"base_url": values["openai_api_base"],
|
||||
"timeout": values["request_timeout"],
|
||||
@@ -346,8 +347,22 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
average_embedded = average_embedded.model_dump()
|
||||
average = average_embedded["data"][0]["embedding"]
|
||||
else:
|
||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
# should be same as
|
||||
# average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
total_weight = sum(num_tokens_in_batch[i])
|
||||
average = [
|
||||
sum(
|
||||
val * weight
|
||||
for val, weight in zip(embedding, num_tokens_in_batch[i])
|
||||
)
|
||||
/ total_weight
|
||||
for embedding in zip(*_result)
|
||||
]
|
||||
|
||||
# should be same as
|
||||
# embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
magnitude = sum(val**2 for val in average) ** 0.5
|
||||
embeddings[i] = [val / magnitude for val in average]
|
||||
|
||||
return embeddings
|
||||
|
||||
@@ -456,8 +471,21 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
average_embedded = average_embedded.model_dump()
|
||||
average = average_embedded["data"][0]["embedding"]
|
||||
else:
|
||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
# should be same as
|
||||
# average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||
total_weight = sum(num_tokens_in_batch[i])
|
||||
average = [
|
||||
sum(
|
||||
val * weight
|
||||
for val, weight in zip(embedding, num_tokens_in_batch[i])
|
||||
)
|
||||
/ total_weight
|
||||
for embedding in zip(*_result)
|
||||
]
|
||||
# should be same as
|
||||
# embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||
magnitude = sum(val**2 for val in average) ** 0.5
|
||||
embeddings[i] = [val / magnitude for val in average]
|
||||
|
||||
return embeddings
|
||||
|
||||
|
Reference in New Issue
Block a user