feat: interfaces for async embeddings, implement async openai (#6563)

Since it seems like #6111 will be blocked for a bit, I've forked
@tyree731's fork and implemented the requested changes.

This change adds support to the base Embeddings class for two methods,
aembed_query and aembed_documents, those two methods supporting async
equivalents of embed_query and
embed_documents respectively. This ever so slightly rounds out async
support within langchain, with an initial implementation of this
functionality being implemented for openai.

Implements https://github.com/hwchase17/langchain/issues/6109

---------

Co-authored-by: Stephen Tyree <tyree731@gmail.com>
This commit is contained in:
Brendan Graham
2023-06-21 23:16:33 -07:00
committed by GitHub
parent ca24dc2d5f
commit d718f3b6d0
3 changed files with 185 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
"""Test openai embeddings."""
import numpy as np
import openai
import pytest
from langchain.embeddings.openai import OpenAIEmbeddings
@@ -26,6 +27,19 @@ def test_openai_embedding_documents_multiple() -> None:
assert len(output[2]) == 1536
@pytest.mark.asyncio
async def test_openai_embedding_documents_async_multiple() -> None:
"""Test openai embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = OpenAIEmbeddings(chunk_size=2)
embedding.embedding_ctx_length = 8191
output = await embedding.aembed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1536
assert len(output[1]) == 1536
assert len(output[2]) == 1536
def test_openai_embedding_query() -> None:
"""Test openai embeddings."""
document = "foo bar"
@@ -34,6 +48,15 @@ def test_openai_embedding_query() -> None:
assert len(output) == 1536
@pytest.mark.asyncio
async def test_openai_embedding_async_query() -> None:
"""Test openai embeddings."""
document = "foo bar"
embedding = OpenAIEmbeddings()
output = await embedding.aembed_query(document)
assert len(output) == 1536
def test_openai_embedding_with_empty_string() -> None:
"""Test openai embeddings with empty string."""
document = ["", "abc"]