From 56499cf58b2af71158b18f2dbf3002950332c4f2 Mon Sep 17 00:00:00 2001 From: ccurme Date: Thu, 21 Nov 2024 13:23:10 -0500 Subject: [PATCH] openai[patch]: unskip test and relax tolerance in embeddings comparison (#28262) From what I can tell response using SDK is not deterministic: ```python import numpy as np import openai documents = ["disallowed special token '<|endoftext|>'"] model = "text-embedding-ada-002" direct_output_1 = ( openai.OpenAI() .embeddings.create(input=documents, model=model) .data[0] .embedding ) for i in range(10): direct_output_2 = ( openai.OpenAI() .embeddings.create(input=documents, model=model) .data[0] .embedding ) print(f"{i}: {np.isclose(direct_output_1, direct_output_2).all()}") ``` ``` 0: True 1: True 2: True 3: True 4: False 5: True 6: True 7: True 8: True 9: True ``` See related discussion here: https://community.openai.com/t/can-text-embedding-ada-002-be-made-deterministic/318054 Found the same result using `"text-embedding-3-small"`. --- .../tests/integration_tests/embeddings/test_azure.py | 2 +- .../openai/tests/integration_tests/embeddings/test_base.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py b/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py index 5bf5e7a2480..18644ee66aa 100644 --- a/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py +++ b/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py @@ -117,7 +117,7 @@ def test_azure_openai_embedding_with_empty_string() -> None: .data[0] .embedding ) - assert np.allclose(output[0], expected_output, atol=0.0001) + assert np.allclose(output[0], expected_output, atol=0.001) assert len(output[1]) == 1536 diff --git a/libs/partners/openai/tests/integration_tests/embeddings/test_base.py b/libs/partners/openai/tests/integration_tests/embeddings/test_base.py index ef16dd2f48a..321edcfc0fb 100644 --- a/libs/partners/openai/tests/integration_tests/embeddings/test_base.py +++ b/libs/partners/openai/tests/integration_tests/embeddings/test_base.py @@ -2,7 +2,6 @@ import numpy as np import openai -import pytest from langchain_openai.embeddings.base import OpenAIEmbeddings @@ -33,7 +32,6 @@ def test_langchain_openai_embeddings_dimensions() -> None: assert len(output[0]) == 128 -@pytest.mark.skip(reason="flaky") def test_langchain_openai_embeddings_equivalent_to_raw() -> None: documents = ["disallowed special token '<|endoftext|>'"] embedding = OpenAIEmbeddings() @@ -45,10 +43,9 @@ def test_langchain_openai_embeddings_equivalent_to_raw() -> None: .data[0] .embedding ) - assert np.isclose(lc_output, direct_output).all() + assert np.allclose(lc_output, direct_output, atol=0.001) -@pytest.mark.skip(reason="flaky") async def test_langchain_openai_embeddings_equivalent_to_raw_async() -> None: documents = ["disallowed special token '<|endoftext|>'"] embedding = OpenAIEmbeddings() @@ -60,7 +57,7 @@ async def test_langchain_openai_embeddings_equivalent_to_raw_async() -> None: .data[0] .embedding ) - assert np.isclose(lc_output, direct_output).all() + assert np.allclose(lc_output, direct_output, atol=0.001) def test_langchain_openai_embeddings_dimensions_large_num() -> None: