mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-13 14:50:00 +00:00
openai[patch]: unskip test and relax tolerance in embeddings comparison (#28262)
From what I can tell response using SDK is not deterministic: ```python import numpy as np import openai documents = ["disallowed special token '<|endoftext|>'"] model = "text-embedding-ada-002" direct_output_1 = ( openai.OpenAI() .embeddings.create(input=documents, model=model) .data[0] .embedding ) for i in range(10): direct_output_2 = ( openai.OpenAI() .embeddings.create(input=documents, model=model) .data[0] .embedding ) print(f"{i}: {np.isclose(direct_output_1, direct_output_2).all()}") ``` ``` 0: True 1: True 2: True 3: True 4: False 5: True 6: True 7: True 8: True 9: True ``` See related discussion here: https://community.openai.com/t/can-text-embedding-ada-002-be-made-deterministic/318054 Found the same result using `"text-embedding-3-small"`.
This commit is contained in:
parent
f5f53d1101
commit
56499cf58b
@ -117,7 +117,7 @@ def test_azure_openai_embedding_with_empty_string() -> None:
|
|||||||
.data[0]
|
.data[0]
|
||||||
.embedding
|
.embedding
|
||||||
)
|
)
|
||||||
assert np.allclose(output[0], expected_output, atol=0.0001)
|
assert np.allclose(output[0], expected_output, atol=0.001)
|
||||||
assert len(output[1]) == 1536
|
assert len(output[1]) == 1536
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import openai
|
import openai
|
||||||
import pytest
|
|
||||||
|
|
||||||
from langchain_openai.embeddings.base import OpenAIEmbeddings
|
from langchain_openai.embeddings.base import OpenAIEmbeddings
|
||||||
|
|
||||||
@ -33,7 +32,6 @@ def test_langchain_openai_embeddings_dimensions() -> None:
|
|||||||
assert len(output[0]) == 128
|
assert len(output[0]) == 128
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="flaky")
|
|
||||||
def test_langchain_openai_embeddings_equivalent_to_raw() -> None:
|
def test_langchain_openai_embeddings_equivalent_to_raw() -> None:
|
||||||
documents = ["disallowed special token '<|endoftext|>'"]
|
documents = ["disallowed special token '<|endoftext|>'"]
|
||||||
embedding = OpenAIEmbeddings()
|
embedding = OpenAIEmbeddings()
|
||||||
@ -45,10 +43,9 @@ def test_langchain_openai_embeddings_equivalent_to_raw() -> None:
|
|||||||
.data[0]
|
.data[0]
|
||||||
.embedding
|
.embedding
|
||||||
)
|
)
|
||||||
assert np.isclose(lc_output, direct_output).all()
|
assert np.allclose(lc_output, direct_output, atol=0.001)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="flaky")
|
|
||||||
async def test_langchain_openai_embeddings_equivalent_to_raw_async() -> None:
|
async def test_langchain_openai_embeddings_equivalent_to_raw_async() -> None:
|
||||||
documents = ["disallowed special token '<|endoftext|>'"]
|
documents = ["disallowed special token '<|endoftext|>'"]
|
||||||
embedding = OpenAIEmbeddings()
|
embedding = OpenAIEmbeddings()
|
||||||
@ -60,7 +57,7 @@ async def test_langchain_openai_embeddings_equivalent_to_raw_async() -> None:
|
|||||||
.data[0]
|
.data[0]
|
||||||
.embedding
|
.embedding
|
||||||
)
|
)
|
||||||
assert np.isclose(lc_output, direct_output).all()
|
assert np.allclose(lc_output, direct_output, atol=0.001)
|
||||||
|
|
||||||
|
|
||||||
def test_langchain_openai_embeddings_dimensions_large_num() -> None:
|
def test_langchain_openai_embeddings_dimensions_large_num() -> None:
|
||||||
|
Loading…
Reference in New Issue
Block a user