diff --git a/libs/langchain/langchain/chat_models/azure_openai.py b/libs/langchain/langchain/chat_models/azure_openai.py index fe52489e10d..f7a2bcd61a3 100644 --- a/libs/langchain/langchain/chat_models/azure_openai.py +++ b/libs/langchain/langchain/chat_models/azure_openai.py @@ -2,12 +2,15 @@ from __future__ import annotations import logging +import os +import warnings from typing import Any, Dict, Union -from langchain.chat_models.openai import ChatOpenAI, _is_openai_v1 +from langchain.chat_models.openai import ChatOpenAI from langchain.pydantic_v1 import BaseModel, Field, root_validator from langchain.schema import ChatResult from langchain.utils import get_from_dict_or_env +from langchain.utils.openai import is_openai_v1 logger = logging.getLogger(__name__) @@ -51,48 +54,82 @@ class AzureChatOpenAI(ChatOpenAI): in, even if not explicitly saved on this class. """ - deployment_name: str = Field(default="", alias="azure_deployment") - model_version: str = "" - openai_api_type: str = "" - openai_api_base: str = Field(default="", alias="azure_endpoint") + azure_endpoint: Union[str, None] = None + """Your Azure endpoint, including the resource. + + Example: `https://example-resource.azure.openai.com/` + """ + deployment_name: Union[str, None] = Field(default=None, alias="azure_deployment") + """A model deployment. + + If given sets the base client URL to include `/deployments/{azure_deployment}`. + Note: this means you won't be able to use non-deployment endpoints. + """ openai_api_version: str = Field(default="", alias="api_version") - openai_api_key: str = Field(default="", alias="api_key") - openai_organization: str = Field(default="", alias="organization") - openai_proxy: str = "" + """Automatically inferred from env var `OPENAI_API_VERSION` if not provided.""" + openai_api_key: Union[str, None] = Field(default=None, alias="api_key") + """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided.""" + azure_ad_token: Union[str, None] = None + """Your Azure Active Directory token. + + Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided. + + For more: + https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id. + """ # noqa: E501 + azure_ad_token_provider: Union[str, None] = None + """A function that returns an Azure Active Directory token. + + Will be invoked on every request. + """ + model_version: str = "" + """Legacy, for openai<1.0.0 support.""" + openai_api_type: str = "" + """Legacy, for openai<1.0.0 support.""" + validate_base_url: bool = True @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - values["openai_api_key"] = get_from_dict_or_env( - values, - "openai_api_key", - "OPENAI_API_KEY", + if values["n"] < 1: + raise ValueError("n must be at least 1.") + if values["n"] > 1 and values["streaming"]: + raise ValueError("n must be 1 when streaming.") + + # Check OPENAI_KEY for backwards compatibility. + # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using + # other forms of azure credentials. + values["openai_api_key"] = ( + values["openai_api_key"] + or os.getenv("AZURE_OPENAI_API_KEY") + or os.getenv("OPENAI_API_KEY") ) - values["openai_api_base"] = get_from_dict_or_env( - values, - "openai_api_base", - "OPENAI_API_BASE", + values["openai_api_base"] = values["openai_api_base"] or os.getenv( + "OPENAI_API_BASE" ) - values["openai_api_version"] = get_from_dict_or_env( - values, - "openai_api_version", - "OPENAI_API_VERSION", + values["openai_api_version"] = values["openai_api_version"] or os.getenv( + "OPENAI_API_VERSION" ) + # Check OPENAI_ORGANIZATION for backwards compatibility. + values["openai_organization"] = ( + values["openai_organization"] + or os.getenv("OPENAI_ORG_ID") + or os.getenv("OPENAI_ORGANIZATION") + ) + values["azure_endpoint"] = values["azure_endpoint"] or os.getenv( + "AZURE_OPENAI_ENDPOINT" + ) + values["azure_ad_token"] = values["azure_ad_token"] or os.getenv( + "AZURE_OPENAI_AD_TOKEN" + ) + values["openai_api_type"] = get_from_dict_or_env( values, "openai_api_type", "OPENAI_API_TYPE", default="azure" ) - values["openai_organization"] = get_from_dict_or_env( - values, - "openai_organization", - "OPENAI_ORGANIZATION", - default="", - ) values["openai_proxy"] = get_from_dict_or_env( - values, - "openai_proxy", - "OPENAI_PROXY", - default="", + values, "openai_proxy", "OPENAI_PROXY", default="" ) + try: import openai @@ -101,37 +138,69 @@ class AzureChatOpenAI(ChatOpenAI): "Could not import openai python package. " "Please install it with `pip install openai`." ) - if _is_openai_v1(): - values["client"] = openai.AzureOpenAI( - azure_endpoint=values["openai_api_base"], - api_key=values["openai_api_key"], - timeout=values["request_timeout"], - max_retries=values["max_retries"], - organization=values["openai_organization"], - api_version=values["openai_api_version"], - azure_deployment=values["deployment_name"], - ).chat.completions + if is_openai_v1(): + # For backwards compatibility. Before openai v1, no distinction was made + # between azure_endpoint and base_url (openai_api_base). + openai_api_base = values["openai_api_base"] + if openai_api_base and values["validate_base_url"]: + if "/openai" not in openai_api_base: + values["openai_api_base"] = ( + values["openai_api_base"].rstrip("/") + "/openai" + ) + warnings.warn( + "As of openai>=1.0.0, Azure endpoints should be specified via " + f"the `azure_endpoint` param not `openai_api_base` " + f"(or alias `base_url`). Updating `openai_api_base` from " + f"{openai_api_base} to {values['openai_api_base']}." + ) + if values["deployment_name"]: + warnings.warn( + "As of openai>=1.0.0, if `deployment_name` (or alias " + "`azure_deployment`) is specified then " + "`openai_api_base` (or alias `base_url`) should not be. " + "Instead use `deployment_name` (or alias `azure_deployment`) " + "and `azure_endpoint`." + ) + if values["deployment_name"] not in values["openai_api_base"]: + warnings.warn( + "As of openai>=1.0.0, if `openai_api_base` " + "(or alias `base_url`) is specified it is expected to be " + "of the form " + "https://example-resource.azure.openai.com/openai/deployments/example-deployment. " # noqa: E501 + f"Updating {openai_api_base} to " + f"{values['openai_api_base']}." + ) + values["openai_api_base"] += ( + "/deployments/" + values["deployment_name"] + ) + values["deployment_name"] = None + client_params = { + "api_version": values["openai_api_version"], + "azure_endpoint": values["azure_endpoint"], + "azure_deployment": values["deployment_name"], + "api_key": values["openai_api_key"], + "azure_ad_token": values["azure_ad_token"], + "azure_ad_token_provider": values["azure_ad_token_provider"], + "organization": values["openai_organization"], + "base_url": values["openai_api_base"], + "timeout": values["request_timeout"], + "max_retries": values["max_retries"], + "default_headers": values["default_headers"], + "default_query": values["default_query"], + "http_client": values["http_client"], + } + values["client"] = openai.AzureOpenAI(**client_params).chat.completions values["async_client"] = openai.AsyncAzureOpenAI( - azure_endpoint=values["openai_api_base"], - api_key=values["openai_api_key"], - timeout=values["request_timeout"], - max_retries=values["max_retries"], - organization=values["openai_organization"], - api_version=values["openai_api_version"], - azure_deployment=values["deployment_name"], + **client_params ).chat.completions else: values["client"] = openai.ChatCompletion - if values["n"] < 1: - raise ValueError("n must be at least 1.") - if values["n"] > 1 and values["streaming"]: - raise ValueError("n must be 1 when streaming.") return values @property def _default_params(self) -> Dict[str, Any]: """Get the default parameters for calling OpenAI API.""" - if _is_openai_v1(): + if is_openai_v1(): return super()._default_params else: return { @@ -147,7 +216,7 @@ class AzureChatOpenAI(ChatOpenAI): @property def _client_params(self) -> Dict[str, Any]: """Get the config params used for the openai client.""" - if _is_openai_v1(): + if is_openai_v1(): return super()._client_params else: return { diff --git a/libs/langchain/langchain/chat_models/openai.py b/libs/langchain/langchain/chat_models/openai.py index d5127b86e8e..86f90b9cad6 100644 --- a/libs/langchain/langchain/chat_models/openai.py +++ b/libs/langchain/langchain/chat_models/openai.py @@ -2,8 +2,8 @@ from __future__ import annotations import logging +import os import sys -from importlib.metadata import version from typing import ( TYPE_CHECKING, Any, @@ -20,8 +20,6 @@ from typing import ( Union, ) -from packaging.version import Version, parse - from langchain.adapters.openai import convert_dict_to_message, convert_message_to_dict from langchain.callbacks.manager import ( AsyncCallbackManagerForLLMRun, @@ -51,6 +49,7 @@ from langchain.utils import ( get_from_dict_or_env, get_pydantic_field_names, ) +from langchain.utils.openai import is_openai_v1 if TYPE_CHECKING: import httpx @@ -98,7 +97,7 @@ async def acompletion_with_retry( **kwargs: Any, ) -> Any: """Use tenacity to retry the async completion call.""" - if _is_openai_v1(): + if is_openai_v1(): return await llm.async_client.create(**kwargs) retry_decorator = _create_retry_decorator(llm, run_manager=run_manager) @@ -140,11 +139,6 @@ def _convert_delta_to_message_chunk( return default_class(content=content) -def _is_openai_v1() -> bool: - _version = parse(version("openai")) - return _version >= Version("1.0.0") - - class ChatOpenAI(BaseChatModel): """`OpenAI` Chat large language models API. @@ -169,13 +163,13 @@ class ChatOpenAI(BaseChatModel): def lc_attributes(self) -> Dict[str, Any]: attributes: Dict[str, Any] = {} - if self.openai_organization != "": + if self.openai_organization: attributes["openai_organization"] = self.openai_organization - if self.openai_api_base != "": + if self.openai_api_base: attributes["openai_api_base"] = self.openai_api_base - if self.openai_proxy != "": + if self.openai_proxy: attributes["openai_proxy"] = self.openai_proxy return attributes @@ -197,10 +191,12 @@ class ChatOpenAI(BaseChatModel): # Check for classes that derive from this class (as some of them # may assume openai_api_key is a str) openai_api_key: Optional[str] = Field(default=None, alias="api_key") - """Base URL path for API requests, - leave blank if not using a proxy or service emulator.""" + """Automatically inferred from env var `OPENAI_API_KEY` if not provided.""" openai_api_base: Optional[str] = Field(default=None, alias="base_url") + """Base URL path for API requests, leave blank if not using a proxy or service + emulator.""" openai_organization: Optional[str] = Field(default=None, alias="organization") + """Automatically inferred from env var `OPENAI_ORG_ID` if not provided.""" # to support explicit proxy for OpenAI openai_proxy: Optional[str] = None request_timeout: Union[float, Tuple[float, float], httpx.Timeout, None] = Field( @@ -225,6 +221,11 @@ class ChatOpenAI(BaseChatModel): when using one of the many model providers that expose an OpenAI-like API but with different models. In those cases, in order to avoid erroring when tiktoken is called, you can specify a model name to use here.""" + default_headers: Union[Mapping[str, str], None] = None + default_query: Union[Mapping[str, object], None] = None + # Configure a custom httpx client. See the + # [httpx documentation](https://www.python-httpx.org/api/#client) for more details. + http_client: Union[httpx.Client, None] = None class Config: """Configuration for this pydantic object.""" @@ -260,20 +261,22 @@ class ChatOpenAI(BaseChatModel): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" + if values["n"] < 1: + raise ValueError("n must be at least 1.") + if values["n"] > 1 and values["streaming"]: + raise ValueError("n must be 1 when streaming.") + values["openai_api_key"] = get_from_dict_or_env( values, "openai_api_key", "OPENAI_API_KEY" ) - values["openai_organization"] = get_from_dict_or_env( - values, - "openai_organization", - "OPENAI_ORGANIZATION", - default="", + # Check OPENAI_ORGANIZATION for backwards compatibility. + values["openai_organization"] = ( + values["openai_organization"] + or os.getenv("OPENAI_ORG_ID") + or os.getenv("OPENAI_ORGANIZATION") ) - values["openai_api_base"] = get_from_dict_or_env( - values, - "openai_api_base", - "OPENAI_API_BASE", - default="", + values["openai_api_base"] = values["openai_api_base"] or os.getenv( + "OPENAI_API_BASE" ) values["openai_proxy"] = get_from_dict_or_env( values, @@ -285,32 +288,28 @@ class ChatOpenAI(BaseChatModel): import openai except ImportError: - raise ValueError( + raise ImportError( "Could not import openai python package. " "Please install it with `pip install openai`." ) - if _is_openai_v1(): - values["client"] = openai.OpenAI( - api_key=values["openai_api_key"], - timeout=values["request_timeout"], - max_retries=values["max_retries"], - organization=values["openai_organization"], - base_url=values["openai_api_base"] or None, - ).chat.completions + if is_openai_v1(): + client_params = { + "api_key": values["openai_api_key"], + "organization": values["openai_organization"], + "base_url": values["openai_api_base"], + "timeout": values["request_timeout"], + "max_retries": values["max_retries"], + "default_headers": values["default_headers"], + "default_query": values["default_query"], + "http_client": values["http_client"], + } + values["client"] = openai.OpenAI(**client_params).chat.completions values["async_client"] = openai.AsyncOpenAI( - api_key=values["openai_api_key"], - timeout=values["request_timeout"], - max_retries=values["max_retries"], - organization=values["openai_organization"], - base_url=values["openai_api_base"] or None, + **client_params ).chat.completions else: values["client"] = openai.ChatCompletion - if values["n"] < 1: - raise ValueError("n must be at least 1.") - if values["n"] > 1 and values["streaming"]: - raise ValueError("n must be 1 when streaming.") return values @property @@ -331,7 +330,7 @@ class ChatOpenAI(BaseChatModel): self, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any ) -> Any: """Use tenacity to retry the completion call.""" - if _is_openai_v1(): + if is_openai_v1(): return self.client.create(**kwargs) retry_decorator = _create_retry_decorator(self, run_manager=run_manager) @@ -510,7 +509,7 @@ class ChatOpenAI(BaseChatModel): openai_creds: Dict[str, Any] = { "model": self.model_name, } - if not _is_openai_v1(): + if not is_openai_v1(): openai_creds.update( { "api_key": self.openai_api_key, diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 9098acd5276..8c5520a2894 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -19,6 +19,7 @@ from langchain.embeddings.aleph_alpha import ( AlephAlphaSymmetricSemanticEmbedding, ) from langchain.embeddings.awa import AwaEmbeddings +from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint from langchain.embeddings.bedrock import BedrockEmbeddings from langchain.embeddings.cache import CacheBackedEmbeddings @@ -72,6 +73,7 @@ logger = logging.getLogger(__name__) __all__ = [ "OpenAIEmbeddings", + "AzureOpenAIEmbeddings", "CacheBackedEmbeddings", "ClarifaiEmbeddings", "CohereEmbeddings", diff --git a/libs/langchain/langchain/embeddings/azure_openai.py b/libs/langchain/langchain/embeddings/azure_openai.py new file mode 100644 index 00000000000..f9a233e55be --- /dev/null +++ b/libs/langchain/langchain/embeddings/azure_openai.py @@ -0,0 +1,149 @@ +"""Azure OpenAI embeddings wrapper.""" +from __future__ import annotations + +import os +import warnings +from typing import Dict, Optional, Union + +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.pydantic_v1 import Field, root_validator +from langchain.utils import get_from_dict_or_env +from langchain.utils.openai import is_openai_v1 + + +class AzureOpenAIEmbeddings(OpenAIEmbeddings): + """`Azure OpenAI` Embeddings API.""" + + azure_endpoint: Union[str, None] = None + """Your Azure endpoint, including the resource. + + Example: `https://example-resource.azure.openai.com/` + """ + azure_deployment: Optional[str] = None + """A model deployment. + + If given sets the base client URL to include `/deployments/{azure_deployment}`. + Note: this means you won't be able to use non-deployment endpoints. + """ + openai_api_key: Union[str, None] = Field(default=None, alias="api_key") + """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided.""" + azure_ad_token: Union[str, None] = None + """Your Azure Active Directory token. + + Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided. + + For more: + https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id. + """ # noqa: E501 + azure_ad_token_provider: Union[str, None] = None + """A function that returns an Azure Active Directory token. + + Will be invoked on every request. + """ + openai_api_version: Optional[str] = Field(default=None, alias="api_version") + """Automatically inferred from env var `OPENAI_API_VERSION` if not provided.""" + validate_base_url: bool = True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + # Check OPENAI_KEY for backwards compatibility. + # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using + # other forms of azure credentials. + values["openai_api_key"] = ( + values["openai_api_key"] + or os.getenv("AZURE_OPENAI_API_KEY") + or os.getenv("OPENAI_API_KEY") + ) + values["openai_api_base"] = values["openai_api_base"] or os.getenv( + "OPENAI_API_BASE" + ) + values["openai_api_version"] = values["openai_api_version"] or os.getenv( + "OPENAI_API_VERSION", default="2023-05-15" + ) + values["openai_api_type"] = get_from_dict_or_env( + values, "openai_api_type", "OPENAI_API_TYPE", default="azure" + ) + values["openai_organization"] = ( + values["openai_organization"] + or os.getenv("OPENAI_ORG_ID") + or os.getenv("OPENAI_ORGANIZATION") + ) + values["openai_proxy"] = get_from_dict_or_env( + values, + "openai_proxy", + "OPENAI_PROXY", + default="", + ) + values["azure_endpoint"] = values["azure_endpoint"] or os.getenv( + "AZURE_OPENAI_ENDPOINT" + ) + values["azure_ad_token"] = values["azure_ad_token"] or os.getenv( + "AZURE_OPENAI_AD_TOKEN" + ) + try: + import openai + + except ImportError: + raise ImportError( + "Could not import openai python package. " + "Please install it with `pip install openai`." + ) + if is_openai_v1(): + # For backwards compatibility. Before openai v1, no distinction was made + # between azure_endpoint and base_url (openai_api_base). + openai_api_base = values["openai_api_base"] + if openai_api_base and values["validate_base_url"]: + if "/openai" not in openai_api_base: + values["openai_api_base"] += "/openai" + warnings.warn( + "As of openai>=1.0.0, Azure endpoints should be specified via " + f"the `azure_endpoint` param not `openai_api_base` " + f"(or alias `base_url`). Updating `openai_api_base` from " + f"{openai_api_base} to {values['openai_api_base']}." + ) + if values["azure_deployment"]: + warnings.warn( + "As of openai>=1.0.0, if `azure_deployment` (or alias " + "`azure_deployment`) is specified then " + "`openai_api_base` (or alias `base_url`) should not be. " + "Instead use `azure_deployment` (or alias `azure_deployment`) " + "and `azure_endpoint`." + ) + if values["azure_deployment"] not in values["openai_api_base"]: + warnings.warn( + "As of openai>=1.0.0, if `openai_api_base` " + "(or alias `base_url`) is specified it is expected to be " + "of the form " + "https://example-resource.azure.openai.com/openai/deployments/example-deployment. " # noqa: E501 + f"Updating {openai_api_base} to " + f"{values['openai_api_base']}." + ) + values["openai_api_base"] += ( + "/deployments/" + values["azure_deployment"] + ) + values["azure_deployment"] = None + client_params = { + "api_version": values["openai_api_version"], + "azure_endpoint": values["azure_endpoint"], + "azure_deployment": values["azure_deployment"], + "api_key": values["openai_api_key"], + "azure_ad_token": values["azure_ad_token"], + "azure_ad_token_provider": values["azure_ad_token_provider"], + "organization": values["openai_organization"], + "base_url": values["openai_api_base"], + "timeout": values["request_timeout"], + "max_retries": values["max_retries"], + "default_headers": values["default_headers"], + "default_query": values["default_query"], + "http_client": values["http_client"], + } + values["client"] = openai.AzureOpenAI(**client_params).embeddings + values["async_client"] = openai.AsyncAzureOpenAI(**client_params).embeddings + else: + values["client"] = openai.Embedding + return values + + @property + def _llm_type(self) -> str: + return "azure-openai-chat" diff --git a/libs/langchain/langchain/embeddings/openai.py b/libs/langchain/langchain/embeddings/openai.py index 8ffead31989..be36aa25f96 100644 --- a/libs/langchain/langchain/embeddings/openai.py +++ b/libs/langchain/langchain/embeddings/openai.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import os import warnings from importlib.metadata import version from typing import ( @@ -10,6 +11,7 @@ from typing import ( Dict, List, Literal, + Mapping, Optional, Sequence, Set, @@ -157,6 +159,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): .. code-block:: python import os + os.environ["OPENAI_API_TYPE"] = "azure" os.environ["OPENAI_API_BASE"] = "https:// Dict[str, Any]: @@ -250,17 +266,14 @@ class OpenAIEmbeddings(BaseModel, Embeddings): values["model_kwargs"] = extra return values - @root_validator(pre=True) + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" values["openai_api_key"] = get_from_dict_or_env( values, "openai_api_key", "OPENAI_API_KEY" ) - values["openai_api_base"] = get_from_dict_or_env( - values, - "openai_api_base", - "OPENAI_API_BASE", - default="", + values["openai_api_base"] = values["openai_api_base"] or os.getenv( + "OPENAI_API_BASE" ) values["openai_api_type"] = get_from_dict_or_env( values, @@ -275,61 +288,61 @@ class OpenAIEmbeddings(BaseModel, Embeddings): default="", ) if values["openai_api_type"] in ("azure", "azure_ad", "azuread"): - default_api_version = "2022-12-01" + default_api_version = "2023-05-15" # Azure OpenAI embedding models allow a maximum of 16 texts # at a time in each batch # See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings - default_chunk_size = 16 + values["chunk_size"] = max(values["chunk_size"], 16) else: default_api_version = "" - default_chunk_size = 1000 values["openai_api_version"] = get_from_dict_or_env( values, "openai_api_version", "OPENAI_API_VERSION", default=default_api_version, ) - values["openai_organization"] = get_from_dict_or_env( - values, - "openai_organization", - "OPENAI_ORGANIZATION", - default="", + # Check OPENAI_ORGANIZATION for backwards compatibility. + values["openai_organization"] = ( + values["openai_organization"] + or os.getenv("OPENAI_ORG_ID") + or os.getenv("OPENAI_ORGANIZATION") ) - if "chunk_size" not in values: - values["chunk_size"] = default_chunk_size try: import openai - - if _is_openai_v1(): - values["client"] = openai.OpenAI( - api_key=values.get("openai_api_key"), - timeout=values.get("request_timeout"), - max_retries=values.get("max_retries"), - organization=values.get("openai_organization"), - base_url=values.get("openai_api_base") or None, - ).embeddings - values["async_client"] = openai.AsyncOpenAI( - api_key=values.get("openai_api_key"), - timeout=values.get("request_timeout"), - max_retries=values.get("max_retries"), - organization=values.get("openai_organization"), - base_url=values.get("openai_api_base") or None, - ).embeddings - else: - values["client"] = openai.Embedding except ImportError: raise ImportError( "Could not import openai python package. " "Please install it with `pip install openai`." ) + else: + if _is_openai_v1(): + if values["openai_api_type"] in ("azure", "azure_ad", "azuread"): + warnings.warn( + "If you have openai>=1.0.0 installed and are using Azure, " + "please use the `AzureOpenAIEmbeddings` class." + ) + client_params = { + "api_key": values["openai_api_key"], + "organization": values["openai_organization"], + "base_url": values["openai_api_base"], + "timeout": values["request_timeout"], + "max_retries": values["max_retries"], + "default_headers": values["default_headers"], + "default_query": values["default_query"], + "http_client": values["http_client"], + } + values["client"] = openai.OpenAI(**client_params).embeddings + values["async_client"] = openai.AsyncOpenAI(**client_params).embeddings + else: + values["client"] = openai.Embedding return values @property def _invocation_params(self) -> Dict[str, Any]: - openai_args: Dict[str, Any] = ( - {"model": self.model, **self.model_kwargs} - if _is_openai_v1() - else { + if _is_openai_v1(): + openai_args: Dict = {"model": self.model, **self.model_kwargs} + else: + openai_args = { "model": self.model, "request_timeout": self.request_timeout, "headers": self.headers, @@ -340,22 +353,22 @@ class OpenAIEmbeddings(BaseModel, Embeddings): "api_version": self.openai_api_version, **self.model_kwargs, } - ) - if self.openai_api_type in ("azure", "azure_ad", "azuread"): - openai_args["engine"] = self.deployment - if self.openai_proxy: - try: - import openai - except ImportError: - raise ImportError( - "Could not import openai python package. " - "Please install it with `pip install openai`." - ) + if self.openai_api_type in ("azure", "azure_ad", "azuread"): + openai_args["engine"] = self.deployment + # TODO: Look into proxy with openai v1. + if self.openai_proxy: + try: + import openai + except ImportError: + raise ImportError( + "Could not import openai python package. " + "Please install it with `pip install openai`." + ) - openai.proxy = { - "http": self.openai_proxy, - "https": self.openai_proxy, - } # type: ignore[assignment] # noqa: E501 + openai.proxy = { + "http": self.openai_proxy, + "https": self.openai_proxy, + } # type: ignore[assignment] # noqa: E501 return openai_args # please refer to diff --git a/libs/langchain/langchain/utils/openai.py b/libs/langchain/langchain/utils/openai.py new file mode 100644 index 00000000000..6f0c4cfe40c --- /dev/null +++ b/libs/langchain/langchain/utils/openai.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from importlib.metadata import version + +from packaging.version import Version, parse + + +def is_openai_v1() -> bool: + _version = parse(version("openai")) + return _version >= Version("1.0.0") diff --git a/libs/langchain/tests/integration_tests/embeddings/test_azure_openai.py b/libs/langchain/tests/integration_tests/embeddings/test_azure_openai.py new file mode 100644 index 00000000000..dc15a9763e2 --- /dev/null +++ b/libs/langchain/tests/integration_tests/embeddings/test_azure_openai.py @@ -0,0 +1,93 @@ +"""Test openai embeddings.""" +import os +from typing import Any + +import numpy as np +import pytest + +from langchain.embeddings import AzureOpenAIEmbeddings + + +def _get_embeddings(**kwargs: Any) -> AzureOpenAIEmbeddings: + return AzureOpenAIEmbeddings( + openai_api_version=os.environ.get("AZURE_OPENAI_API_VERSION", ""), + **kwargs, + ) + + +def test_azure_openai_embedding_documents() -> None: + """Test openai embeddings.""" + documents = ["foo bar"] + embedding = _get_embeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 1536 + + +def test_azure_openai_embedding_documents_multiple() -> None: + """Test openai embeddings.""" + documents = ["foo bar", "bar foo", "foo"] + embedding = _get_embeddings(chunk_size=2) + embedding.embedding_ctx_length = 8191 + output = embedding.embed_documents(documents) + assert len(output) == 3 + assert len(output[0]) == 1536 + assert len(output[1]) == 1536 + assert len(output[2]) == 1536 + + +@pytest.mark.asyncio +async def test_azure_openai_embedding_documents_async_multiple() -> None: + """Test openai embeddings.""" + documents = ["foo bar", "bar foo", "foo"] + embedding = _get_embeddings(chunk_size=2) + embedding.embedding_ctx_length = 8191 + output = await embedding.aembed_documents(documents) + assert len(output) == 3 + assert len(output[0]) == 1536 + assert len(output[1]) == 1536 + assert len(output[2]) == 1536 + + +def test_azure_openai_embedding_query() -> None: + """Test openai embeddings.""" + document = "foo bar" + embedding = _get_embeddings() + output = embedding.embed_query(document) + assert len(output) == 1536 + + +@pytest.mark.asyncio +async def test_azure_openai_embedding_async_query() -> None: + """Test openai embeddings.""" + document = "foo bar" + embedding = _get_embeddings() + output = await embedding.aembed_query(document) + assert len(output) == 1536 + + +@pytest.mark.skip(reason="Unblock scheduled testing. TODO: fix.") +def test_azure_openai_embedding_with_empty_string() -> None: + """Test openai embeddings with empty string.""" + import openai + + document = ["", "abc"] + embedding = _get_embeddings() + output = embedding.embed_documents(document) + assert len(output) == 2 + assert len(output[0]) == 1536 + expected_output = openai.Embedding.create(input="", model="text-embedding-ada-002")[ + "data" + ][0]["embedding"] + assert np.allclose(output[0], expected_output) + assert len(output[1]) == 1536 + + +def test_embed_documents_normalized() -> None: + output = _get_embeddings().embed_documents(["foo walked to the market"]) + assert np.isclose(np.linalg.norm(output[0]), 1.0) + + +def test_embed_query_normalized() -> None: + output = _get_embeddings().embed_query("foo walked to the market") + assert np.isclose(np.linalg.norm(output), 1.0) diff --git a/libs/langchain/tests/unit_tests/embeddings/test_imports.py b/libs/langchain/tests/unit_tests/embeddings/test_imports.py index dd4f1aebe84..68c8718b26b 100644 --- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py +++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py @@ -2,6 +2,7 @@ from langchain.embeddings import __all__ EXPECTED_ALL = [ "OpenAIEmbeddings", + "AzureOpenAIEmbeddings", "CacheBackedEmbeddings", "ClarifaiEmbeddings", "CohereEmbeddings",