mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 08:58:48 +00:00
fix(embeddings): number of texts in Azure OpenAIEmbeddings batch (#10707)
This PR addresses the limitation of Azure OpenAI embeddings, which can handle at maximum 16 texts in a batch. This can be solved setting `chunk_size=16`. However, I'd love to have this automated, not to force the user to figure where the issue comes from and how to solve it. Closes #4575. @baskaryan --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
7395c28455
commit
f0198354d9
@ -231,7 +231,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
values["model_kwargs"] = extra
|
||||
return values
|
||||
|
||||
@root_validator()
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["openai_api_key"] = get_from_dict_or_env(
|
||||
@ -257,8 +257,13 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
)
|
||||
if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
|
||||
default_api_version = "2022-12-01"
|
||||
# Azure OpenAI embedding models allow a maximum of 16 texts
|
||||
# at a time in each batch
|
||||
# See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
|
||||
default_chunk_size = 16
|
||||
else:
|
||||
default_api_version = ""
|
||||
default_chunk_size = 1000
|
||||
values["openai_api_version"] = get_from_dict_or_env(
|
||||
values,
|
||||
"openai_api_version",
|
||||
@ -271,6 +276,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
"OPENAI_ORGANIZATION",
|
||||
default="",
|
||||
)
|
||||
if "chunk_size" not in values:
|
||||
values["chunk_size"] = default_chunk_size
|
||||
try:
|
||||
import openai
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user