Retryable exception for empty OpenAI embedding. (#7070)

Description:

The OpenAI "embeddings" API intermittently falls into a failure state
where an embedding is returned as [ Nan ], rather than the expected 1536
floats. This patch checks for that state (specifically, for an embedding
of length 1) and if it occurs, throws an ApiError, which will cause the
chunk to be retried.

Issue:

I have been unable to find an official langchain issue for this problem,
but it is discussed (by another user) at
https://stackoverflow.com/questions/76469415/getting-embeddings-of-length-1-from-langchain-openaiembeddings

Maintainer: @dev2049

Testing: 

Since this is an intermittent OpenAI issue, I have not provided a unit
or integration test. The provided code has, though, been run
successfully over several million tokens.

---------

Co-authored-by: William Webber <william@williamwebber.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
wewebber-merlin 2023-07-06 05:23:45 +10:00 committed by GitHub
parent e4459e423b
commit 8a7c95e555
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -86,13 +86,23 @@ def _async_retry_decorator(embeddings: OpenAIEmbeddings) -> Any:
return wrap
# https://stackoverflow.com/questions/76469415/getting-embeddings-of-length-1-from-langchain-openaiembeddings
def _check_response(response: dict) -> dict:
if any(len(d["embedding"]) == 1 for d in response["data"]):
import openai
raise openai.error.APIError("OpenAI API returned an empty embedding")
return response
def embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) -> Any:
"""Use tenacity to retry the embedding call."""
retry_decorator = _create_retry_decorator(embeddings)
@retry_decorator
def _embed_with_retry(**kwargs: Any) -> Any:
return embeddings.client.create(**kwargs)
response = embeddings.client.create(**kwargs)
return _check_response(response)
return _embed_with_retry(**kwargs)
@ -102,7 +112,8 @@ async def async_embed_with_retry(embeddings: OpenAIEmbeddings, **kwargs: Any) ->
@_async_retry_decorator(embeddings)
async def _async_embed_with_retry(**kwargs: Any) -> Any:
return await embeddings.client.acreate(**kwargs)
response = await embeddings.client.acreate(**kwargs)
return _check_response(response)
return await _async_embed_with_retry(**kwargs)