From bb3b6bde33dbee0eec935068bc0164e170250cd9 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Tue, 30 Jan 2024 15:49:56 -0800 Subject: [PATCH] openai[minor]: change to secretstr (#16803) --- libs/partners/openai/Makefile | 5 +- .../langchain_openai/chat_models/azure.py | 28 ++++++--- .../langchain_openai/chat_models/base.py | 16 ++--- .../langchain_openai/embeddings/azure.py | 26 +++++--- .../langchain_openai/embeddings/base.py | 25 ++++++-- .../openai/langchain_openai/llms/azure.py | 35 ++++++----- .../openai/langchain_openai/llms/base.py | 22 ++++--- .../embeddings/test_azure.py | 4 +- .../integration_tests/llms/test_azure.py | 2 +- .../openai/tests/unit_tests/test_secrets.py | 62 +++++++++++++++++++ 10 files changed, 162 insertions(+), 63 deletions(-) create mode 100644 libs/partners/openai/tests/unit_tests/test_secrets.py diff --git a/libs/partners/openai/Makefile b/libs/partners/openai/Makefile index e318d45f8b3..31aae646a55 100644 --- a/libs/partners/openai/Makefile +++ b/libs/partners/openai/Makefile @@ -6,10 +6,9 @@ all: help # Define a variable for the test file path. TEST_FILE ?= tests/unit_tests/ -test: - poetry run pytest $(TEST_FILE) +integration_tests: TEST_FILE=tests/integration_tests/ -tests: +test tests integration_tests: poetry run pytest $(TEST_FILE) diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py index 1584165128f..64b0ebc6ca3 100644 --- a/libs/partners/openai/langchain_openai/chat_models/azure.py +++ b/libs/partners/openai/langchain_openai/chat_models/azure.py @@ -3,12 +3,12 @@ from __future__ import annotations import logging import os -from typing import Any, Callable, Dict, List, Union +from typing import Any, Callable, Dict, List, Optional, Union import openai from langchain_core.outputs import ChatResult -from langchain_core.pydantic_v1 import BaseModel, Field, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator +from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env from langchain_openai.chat_models.base import ChatOpenAI @@ -71,9 +71,9 @@ class AzureChatOpenAI(ChatOpenAI): """ openai_api_version: str = Field(default="", alias="api_version") """Automatically inferred from env var `OPENAI_API_VERSION` if not provided.""" - openai_api_key: Union[str, None] = Field(default=None, alias="api_key") + openai_api_key: Optional[SecretStr] = Field(default=None, alias="api_key") """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided.""" - azure_ad_token: Union[str, None] = None + azure_ad_token: Optional[SecretStr] = None """Your Azure Active Directory token. Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided. @@ -111,11 +111,14 @@ class AzureChatOpenAI(ChatOpenAI): # Check OPENAI_KEY for backwards compatibility. # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using # other forms of azure credentials. - values["openai_api_key"] = ( + openai_api_key = ( values["openai_api_key"] or os.getenv("AZURE_OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY") ) + values["openai_api_key"] = ( + convert_to_secret_str(openai_api_key) if openai_api_key else None + ) values["openai_api_base"] = values["openai_api_base"] or os.getenv( "OPENAI_API_BASE" ) @@ -131,8 +134,9 @@ class AzureChatOpenAI(ChatOpenAI): values["azure_endpoint"] = values["azure_endpoint"] or os.getenv( "AZURE_OPENAI_ENDPOINT" ) - values["azure_ad_token"] = values["azure_ad_token"] or os.getenv( - "AZURE_OPENAI_AD_TOKEN" + azure_ad_token = values["azure_ad_token"] or os.getenv("AZURE_OPENAI_AD_TOKEN") + values["azure_ad_token"] = ( + convert_to_secret_str(azure_ad_token) if azure_ad_token else None ) values["openai_api_type"] = get_from_dict_or_env( @@ -168,8 +172,12 @@ class AzureChatOpenAI(ChatOpenAI): "api_version": values["openai_api_version"], "azure_endpoint": values["azure_endpoint"], "azure_deployment": values["deployment_name"], - "api_key": values["openai_api_key"], - "azure_ad_token": values["azure_ad_token"], + "api_key": values["openai_api_key"].get_secret_value() + if values["openai_api_key"] + else None, + "azure_ad_token": values["azure_ad_token"].get_secret_value() + if values["azure_ad_token"] + else None, "azure_ad_token_provider": values["azure_ad_token_provider"], "organization": values["openai_organization"], "base_url": values["openai_api_base"], diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index ab517e75aa7..d0bed6f9210 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -52,10 +52,11 @@ from langchain_core.messages import ( ToolMessageChunk, ) from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult -from langchain_core.pydantic_v1 import BaseModel, Field, root_validator +from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator from langchain_core.runnables import Runnable from langchain_core.tools import BaseTool from langchain_core.utils import ( + convert_to_secret_str, get_from_dict_or_env, get_pydantic_field_names, ) @@ -240,10 +241,7 @@ class ChatOpenAI(BaseChatModel): """What sampling temperature to use.""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" - # When updating this to use a SecretStr - # Check for classes that derive from this class (as some of them - # may assume openai_api_key is a str) - openai_api_key: Optional[str] = Field(default=None, alias="api_key") + openai_api_key: Optional[SecretStr] = Field(default=None, alias="api_key") """Automatically inferred from env var `OPENAI_API_KEY` if not provided.""" openai_api_base: Optional[str] = Field(default=None, alias="base_url") """Base URL path for API requests, leave blank if not using a proxy or service @@ -321,8 +319,8 @@ class ChatOpenAI(BaseChatModel): if values["n"] > 1 and values["streaming"]: raise ValueError("n must be 1 when streaming.") - values["openai_api_key"] = get_from_dict_or_env( - values, "openai_api_key", "OPENAI_API_KEY" + values["openai_api_key"] = convert_to_secret_str( + get_from_dict_or_env(values, "openai_api_key", "OPENAI_API_KEY") ) # Check OPENAI_ORGANIZATION for backwards compatibility. values["openai_organization"] = ( @@ -341,7 +339,9 @@ class ChatOpenAI(BaseChatModel): ) client_params = { - "api_key": values["openai_api_key"], + "api_key": values["openai_api_key"].get_secret_value() + if values["openai_api_key"] + else None, "organization": values["openai_organization"], "base_url": values["openai_api_base"], "timeout": values["request_timeout"], diff --git a/libs/partners/openai/langchain_openai/embeddings/azure.py b/libs/partners/openai/langchain_openai/embeddings/azure.py index ca0221252be..69288364c05 100644 --- a/libs/partners/openai/langchain_openai/embeddings/azure.py +++ b/libs/partners/openai/langchain_openai/embeddings/azure.py @@ -5,8 +5,8 @@ import os from typing import Callable, Dict, Optional, Union import openai -from langchain_core.pydantic_v1 import Field, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator +from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env from langchain_openai.embeddings.base import OpenAIEmbeddings @@ -39,9 +39,9 @@ class AzureOpenAIEmbeddings(OpenAIEmbeddings): If given sets the base client URL to include `/deployments/{azure_deployment}`. Note: this means you won't be able to use non-deployment endpoints. """ - openai_api_key: Union[str, None] = Field(default=None, alias="api_key") + openai_api_key: Optional[SecretStr] = Field(default=None, alias="api_key") """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided.""" - azure_ad_token: Union[str, None] = None + azure_ad_token: Optional[SecretStr] = None """Your Azure Active Directory token. Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided. @@ -64,11 +64,14 @@ class AzureOpenAIEmbeddings(OpenAIEmbeddings): # Check OPENAI_KEY for backwards compatibility. # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using # other forms of azure credentials. - values["openai_api_key"] = ( + openai_api_key = ( values["openai_api_key"] or os.getenv("AZURE_OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY") ) + values["openai_api_key"] = ( + convert_to_secret_str(openai_api_key) if openai_api_key else None + ) values["openai_api_base"] = values["openai_api_base"] or os.getenv( "OPENAI_API_BASE" ) @@ -92,8 +95,9 @@ class AzureOpenAIEmbeddings(OpenAIEmbeddings): values["azure_endpoint"] = values["azure_endpoint"] or os.getenv( "AZURE_OPENAI_ENDPOINT" ) - values["azure_ad_token"] = values["azure_ad_token"] or os.getenv( - "AZURE_OPENAI_AD_TOKEN" + azure_ad_token = values["azure_ad_token"] or os.getenv("AZURE_OPENAI_AD_TOKEN") + values["azure_ad_token"] = ( + convert_to_secret_str(azure_ad_token) if azure_ad_token else None ) # Azure OpenAI embedding models allow a maximum of 16 texts # at a time in each batch @@ -122,8 +126,12 @@ class AzureOpenAIEmbeddings(OpenAIEmbeddings): "api_version": values["openai_api_version"], "azure_endpoint": values["azure_endpoint"], "azure_deployment": values["deployment"], - "api_key": values["openai_api_key"], - "azure_ad_token": values["azure_ad_token"], + "api_key": values["openai_api_key"].get_secret_value() + if values["openai_api_key"] + else None, + "azure_ad_token": values["azure_ad_token"].get_secret_value() + if values["azure_ad_token"] + else None, "azure_ad_token_provider": values["azure_ad_token_provider"], "organization": values["openai_organization"], "base_url": values["openai_api_base"], diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py index 0e3c8e3eac5..cd0987ea431 100644 --- a/libs/partners/openai/langchain_openai/embeddings/base.py +++ b/libs/partners/openai/langchain_openai/embeddings/base.py @@ -22,8 +22,18 @@ import numpy as np import openai import tiktoken from langchain_core.embeddings import Embeddings -from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator -from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names +from langchain_core.pydantic_v1 import ( + BaseModel, + Extra, + Field, + SecretStr, + root_validator, +) +from langchain_core.utils import ( + convert_to_secret_str, + get_from_dict_or_env, + get_pydantic_field_names, +) logger = logging.getLogger(__name__) @@ -70,7 +80,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): openai_proxy: Optional[str] = None embedding_ctx_length: int = 8191 """The maximum number of tokens to embed at once.""" - openai_api_key: Optional[str] = Field(default=None, alias="api_key") + openai_api_key: Optional[SecretStr] = Field(default=None, alias="api_key") """Automatically inferred from env var `OPENAI_API_KEY` if not provided.""" openai_organization: Optional[str] = Field(default=None, alias="organization") """Automatically inferred from env var `OPENAI_ORG_ID` if not provided.""" @@ -152,9 +162,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - values["openai_api_key"] = get_from_dict_or_env( + openai_api_key = get_from_dict_or_env( values, "openai_api_key", "OPENAI_API_KEY" ) + values["openai_api_key"] = ( + convert_to_secret_str(openai_api_key) if openai_api_key else None + ) values["openai_api_base"] = values["openai_api_base"] or os.getenv( "OPENAI_API_BASE" ) @@ -196,7 +209,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings): "please use the `AzureOpenAIEmbeddings` class." ) client_params = { - "api_key": values["openai_api_key"], + "api_key": values["openai_api_key"].get_secret_value() + if values["openai_api_key"] + else None, "organization": values["openai_organization"], "base_url": values["openai_api_base"], "timeout": values["request_timeout"], diff --git a/libs/partners/openai/langchain_openai/llms/azure.py b/libs/partners/openai/langchain_openai/llms/azure.py index d719c609015..f307d37d54b 100644 --- a/libs/partners/openai/langchain_openai/llms/azure.py +++ b/libs/partners/openai/langchain_openai/llms/azure.py @@ -2,18 +2,11 @@ from __future__ import annotations import logging import os -from typing import ( - Any, - Callable, - Dict, - List, - Mapping, - Union, -) +from typing import Any, Callable, Dict, List, Mapping, Optional, Union import openai -from langchain_core.pydantic_v1 import Field, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator +from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env from langchain_openai.llms.base import BaseOpenAI @@ -52,9 +45,9 @@ class AzureOpenAI(BaseOpenAI): """ openai_api_version: str = Field(default="", alias="api_version") """Automatically inferred from env var `OPENAI_API_VERSION` if not provided.""" - openai_api_key: Union[str, None] = Field(default=None, alias="api_key") + openai_api_key: Optional[SecretStr] = Field(default=None, alias="api_key") """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided.""" - azure_ad_token: Union[str, None] = None + azure_ad_token: Optional[SecretStr] = None """Your Azure Active Directory token. Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided. @@ -92,17 +85,21 @@ class AzureOpenAI(BaseOpenAI): # Check OPENAI_KEY for backwards compatibility. # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using # other forms of azure credentials. - values["openai_api_key"] = ( + openai_api_key = ( values["openai_api_key"] or os.getenv("AZURE_OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY") ) + values["openai_api_key"] = ( + convert_to_secret_str(openai_api_key) if openai_api_key else None + ) values["azure_endpoint"] = values["azure_endpoint"] or os.getenv( "AZURE_OPENAI_ENDPOINT" ) - values["azure_ad_token"] = values["azure_ad_token"] or os.getenv( - "AZURE_OPENAI_AD_TOKEN" + azure_ad_token = values["azure_ad_token"] or os.getenv("AZURE_OPENAI_AD_TOKEN") + values["azure_ad_token"] = ( + convert_to_secret_str(azure_ad_token) if azure_ad_token else None ) values["openai_api_base"] = values["openai_api_base"] or os.getenv( "OPENAI_API_BASE" @@ -150,8 +147,12 @@ class AzureOpenAI(BaseOpenAI): "api_version": values["openai_api_version"], "azure_endpoint": values["azure_endpoint"], "azure_deployment": values["deployment_name"], - "api_key": values["openai_api_key"], - "azure_ad_token": values["azure_ad_token"], + "api_key": values["openai_api_key"].get_secret_value() + if values["openai_api_key"] + else None, + "azure_ad_token": values["azure_ad_token"].get_secret_value() + if values["azure_ad_token"] + else None, "azure_ad_token_provider": values["azure_ad_token_provider"], "organization": values["openai_organization"], "base_url": values["openai_api_base"], diff --git a/libs/partners/openai/langchain_openai/llms/base.py b/libs/partners/openai/langchain_openai/llms/base.py index ef9af629ed0..27c2afd5eef 100644 --- a/libs/partners/openai/langchain_openai/llms/base.py +++ b/libs/partners/openai/langchain_openai/llms/base.py @@ -27,8 +27,12 @@ from langchain_core.callbacks import ( ) from langchain_core.language_models.llms import BaseLLM from langchain_core.outputs import Generation, GenerationChunk, LLMResult -from langchain_core.pydantic_v1 import Field, root_validator -from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator +from langchain_core.utils import ( + convert_to_secret_str, + get_from_dict_or_env, + get_pydantic_field_names, +) from langchain_core.utils.utils import build_extra_kwargs logger = logging.getLogger(__name__) @@ -104,10 +108,7 @@ class BaseOpenAI(BaseLLM): """Generates best_of completions server-side and returns the "best".""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" - # When updating this to use a SecretStr - # Check for classes that derive from this class (as some of them - # may assume openai_api_key is a str) - openai_api_key: Optional[str] = Field(default=None, alias="api_key") + openai_api_key: Optional[SecretStr] = Field(default=None, alias="api_key") """Automatically inferred from env var `OPENAI_API_KEY` if not provided.""" openai_api_base: Optional[str] = Field(default=None, alias="base_url") """Base URL path for API requests, leave blank if not using a proxy or service @@ -175,9 +176,12 @@ class BaseOpenAI(BaseLLM): if values["streaming"] and values["best_of"] > 1: raise ValueError("Cannot stream results when best_of > 1.") - values["openai_api_key"] = get_from_dict_or_env( + openai_api_key = get_from_dict_or_env( values, "openai_api_key", "OPENAI_API_KEY" ) + values["openai_api_key"] = ( + convert_to_secret_str(openai_api_key) if openai_api_key else None + ) values["openai_api_base"] = values["openai_api_base"] or os.getenv( "OPENAI_API_BASE" ) @@ -194,7 +198,9 @@ class BaseOpenAI(BaseLLM): ) client_params = { - "api_key": values["openai_api_key"], + "api_key": values["openai_api_key"].get_secret_value() + if values["openai_api_key"] + else None, "organization": values["openai_organization"], "base_url": values["openai_api_base"], "timeout": values["request_timeout"], diff --git a/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py b/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py index e270faf0923..e7ca5228383 100644 --- a/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py +++ b/libs/partners/openai/tests/integration_tests/embeddings/test_azure.py @@ -22,7 +22,7 @@ def _get_embeddings(**kwargs: Any) -> AzureOpenAIEmbeddings: return AzureOpenAIEmbeddings( azure_deployment=DEPLOYMENT_NAME, api_version=OPENAI_API_VERSION, - openai_api_base=OPENAI_API_BASE, + azure_endpoint=OPENAI_API_BASE, openai_api_key=OPENAI_API_KEY, **kwargs, ) @@ -109,7 +109,7 @@ def test_azure_openai_embedding_with_empty_string() -> None: openai.AzureOpenAI( api_version=OPENAI_API_VERSION, api_key=OPENAI_API_KEY, - base_url=embedding.openai_api_base, + azure_endpoint=OPENAI_API_BASE, azure_deployment=DEPLOYMENT_NAME, ) # type: ignore .embeddings.create(input="", model="text-embedding-ada-002") diff --git a/libs/partners/openai/tests/integration_tests/llms/test_azure.py b/libs/partners/openai/tests/integration_tests/llms/test_azure.py index 66486bb2b74..c00e5afbd12 100644 --- a/libs/partners/openai/tests/integration_tests/llms/test_azure.py +++ b/libs/partners/openai/tests/integration_tests/llms/test_azure.py @@ -22,7 +22,7 @@ def _get_llm(**kwargs: Any) -> AzureOpenAI: return AzureOpenAI( deployment_name=DEPLOYMENT_NAME, openai_api_version=OPENAI_API_VERSION, - openai_api_base=OPENAI_API_BASE, + azure_endpoint=OPENAI_API_BASE, openai_api_key=OPENAI_API_KEY, **kwargs, ) diff --git a/libs/partners/openai/tests/unit_tests/test_secrets.py b/libs/partners/openai/tests/unit_tests/test_secrets.py new file mode 100644 index 00000000000..a90fbb4b7db --- /dev/null +++ b/libs/partners/openai/tests/unit_tests/test_secrets.py @@ -0,0 +1,62 @@ +from langchain_openai import ( + AzureChatOpenAI, + AzureOpenAI, + AzureOpenAIEmbeddings, + ChatOpenAI, + OpenAI, + OpenAIEmbeddings, +) + + +def test_chat_openai_secrets() -> None: + o = ChatOpenAI(openai_api_key="foo") + s = str(o) + assert "foo" not in s + + +def test_openai_secrets() -> None: + o = OpenAI(openai_api_key="foo") + s = str(o) + assert "foo" not in s + + +def test_openai_embeddings_secrets() -> None: + o = OpenAIEmbeddings(openai_api_key="foo") + s = str(o) + assert "foo" not in s + + +def test_azure_chat_openai_secrets() -> None: + o = AzureChatOpenAI( + openai_api_key="foo1", + azure_endpoint="endpoint", + azure_ad_token="foo2", + api_version="version", + ) + s = str(o) + assert "foo1" not in s + assert "foo2" not in s + + +def test_azure_openai_secrets() -> None: + o = AzureOpenAI( + openai_api_key="foo1", + azure_endpoint="endpoint", + azure_ad_token="foo2", + api_version="version", + ) + s = str(o) + assert "foo1" not in s + assert "foo2" not in s + + +def test_azure_openai_embeddings_secrets() -> None: + o = AzureOpenAIEmbeddings( + openai_api_key="foo1", + azure_endpoint="endpoint", + azure_ad_token="foo2", + api_version="version", + ) + s = str(o) + assert "foo1" not in s + assert "foo2" not in s