mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 00:23:25 +00:00
openai[patch]: run _tokenize in background thread in async embedding invocations (#31312)
This commit is contained in:
parent
f16456139b
commit
1b5ffe4107
@ -8,6 +8,7 @@ from typing import Any, Literal, Optional, Union, cast
|
|||||||
import openai
|
import openai
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
from langchain_core.runnables.config import run_in_executor
|
||||||
from langchain_core.utils import from_env, get_pydantic_field_names, secret_from_env
|
from langchain_core.utils import from_env, get_pydantic_field_names, secret_from_env
|
||||||
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
|
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
@ -525,7 +526,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
|
|
||||||
_chunk_size = chunk_size or self.chunk_size
|
_chunk_size = chunk_size or self.chunk_size
|
||||||
client_kwargs = {**self._invocation_params, **kwargs}
|
client_kwargs = {**self._invocation_params, **kwargs}
|
||||||
_iter, tokens, indices = self._tokenize(texts, _chunk_size)
|
_iter, tokens, indices = await run_in_executor(
|
||||||
|
None, self._tokenize, texts, _chunk_size
|
||||||
|
)
|
||||||
batched_embeddings: list[list[float]] = []
|
batched_embeddings: list[list[float]] = []
|
||||||
for i in range(0, len(tokens), _chunk_size):
|
for i in range(0, len(tokens), _chunk_size):
|
||||||
response = await self.async_client.create(
|
response = await self.async_client.create(
|
||||||
|
Loading…
Reference in New Issue
Block a user