diff --git a/libs/langchain/langchain/chat_models/openai.py b/libs/langchain/langchain/chat_models/openai.py index 5ab3e9d7da9..6793d0fa49f 100644 --- a/libs/langchain/langchain/chat_models/openai.py +++ b/libs/langchain/langchain/chat_models/openai.py @@ -327,6 +327,8 @@ class ChatOpenAI(BaseChatModel): } if self.max_tokens is not None: params["max_tokens"] = self.max_tokens + if self.request_timeout is not None and not is_openai_v1(): + params["request_timeout"] = self.request_timeout return params def completion_with_retry( diff --git a/libs/langchain/langchain/llms/openai.py b/libs/langchain/langchain/llms/openai.py index 7ff51f442cf..6ca186b3dd8 100644 --- a/libs/langchain/langchain/llms/openai.py +++ b/libs/langchain/langchain/llms/openai.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import os import sys import warnings from typing import ( @@ -29,6 +30,7 @@ from langchain.pydantic_v1 import Field, root_validator from langchain.schema import Generation, LLMResult from langchain.schema.output import GenerationChunk from langchain.utils import get_from_dict_or_env, get_pydantic_field_names +from langchain.utils.openai import is_openai_v1 from langchain.utils.utils import build_extra_kwargs logger = logging.getLogger(__name__) @@ -106,6 +108,9 @@ def completion_with_retry( **kwargs: Any, ) -> Any: """Use tenacity to retry the completion call.""" + if is_openai_v1(): + return llm.client.create(**kwargs) + retry_decorator = _create_retry_decorator(llm, run_manager=run_manager) @retry_decorator @@ -121,6 +126,9 @@ async def acompletion_with_retry( **kwargs: Any, ) -> Any: """Use tenacity to retry the async completion call.""" + if is_openai_v1(): + return await llm.async_client.create(**kwargs) + retry_decorator = _create_retry_decorator(llm, run_manager=run_manager) @retry_decorator @@ -141,13 +149,13 @@ class BaseOpenAI(BaseLLM): @property def lc_attributes(self) -> Dict[str, Any]: attributes: Dict[str, Any] = {} - if self.openai_api_base != "": + if self.openai_api_base: attributes["openai_api_base"] = self.openai_api_base - if self.openai_organization != "": + if self.openai_organization: attributes["openai_organization"] = self.openai_organization - if self.openai_proxy != "": + if self.openai_proxy: attributes["openai_proxy"] = self.openai_proxy return attributes @@ -157,6 +165,7 @@ class BaseOpenAI(BaseLLM): return True client: Any = None #: :meta private: + async_client: Any = None #: :meta private: model_name: str = Field(default="text-davinci-003", alias="model") """Model name to use.""" temperature: float = 0.7 @@ -177,18 +186,28 @@ class BaseOpenAI(BaseLLM): """Generates best_of completions server-side and returns the "best".""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" - openai_api_key: Optional[str] = None - openai_api_base: Optional[str] = None - openai_organization: Optional[str] = None + # When updating this to use a SecretStr + # Check for classes that derive from this class (as some of them + # may assume openai_api_key is a str) + openai_api_key: Optional[str] = Field(default=None, alias="api_key") + """Automatically inferred from env var `OPENAI_API_KEY` if not provided.""" + openai_api_base: Optional[str] = Field(default=None, alias="base_url") + """Base URL path for API requests, leave blank if not using a proxy or service + emulator.""" + openai_organization: Optional[str] = Field(default=None, alias="organization") + """Automatically inferred from env var `OPENAI_ORG_ID` if not provided.""" # to support explicit proxy for OpenAI openai_proxy: Optional[str] = None batch_size: int = 20 """Batch size to use when passing multiple documents to generate.""" - request_timeout: Optional[Union[float, Tuple[float, float]]] = None - """Timeout for requests to OpenAI completion API. Default is 600 seconds.""" + request_timeout: Union[float, Tuple[float, float], Any, None] = Field( + default=None, alias="timeout" + ) + """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or + None.""" logit_bias: Optional[Dict[str, float]] = Field(default_factory=dict) """Adjust the probability of specific tokens being generated.""" - max_retries: int = 6 + max_retries: int = 2 """Maximum number of retries to make when generating.""" streaming: bool = False """Whether to stream the results or not.""" @@ -206,6 +225,12 @@ class BaseOpenAI(BaseLLM): when using one of the many model providers that expose an OpenAI-like API but with different models. In those cases, in order to avoid erroring when tiktoken is called, you can specify a model name to use here.""" + default_headers: Union[Mapping[str, str], None] = None + default_query: Union[Mapping[str, object], None] = None + # Configure a custom httpx client. See the + # [httpx documentation](https://www.python-httpx.org/api/#client) for more details. + http_client: Union[Any, None] = None + """Optional httpx.Client.""" def __new__(cls, **data: Any) -> Union[OpenAIChat, BaseOpenAI]: # type: ignore """Initialize the OpenAI object.""" @@ -239,14 +264,18 @@ class BaseOpenAI(BaseLLM): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" + if values["n"] < 1: + raise ValueError("n must be at least 1.") + if values["streaming"] and values["n"] > 1: + raise ValueError("Cannot stream results when n > 1.") + if values["streaming"] and values["best_of"] > 1: + raise ValueError("Cannot stream results when best_of > 1.") + values["openai_api_key"] = get_from_dict_or_env( values, "openai_api_key", "OPENAI_API_KEY" ) - values["openai_api_base"] = get_from_dict_or_env( - values, - "openai_api_base", - "OPENAI_API_BASE", - default="", + values["openai_api_base"] = values["openai_api_base"] or os.getenv( + "OPENAI_API_BASE" ) values["openai_proxy"] = get_from_dict_or_env( values, @@ -254,41 +283,54 @@ class BaseOpenAI(BaseLLM): "OPENAI_PROXY", default="", ) - values["openai_organization"] = get_from_dict_or_env( - values, - "openai_organization", - "OPENAI_ORGANIZATION", - default="", + values["openai_organization"] = ( + values["openai_organization"] + or os.getenv("OPENAI_ORG_ID") + or os.getenv("OPENAI_ORGANIZATION") ) try: import openai - - values["client"] = openai.Completion except ImportError: raise ImportError( "Could not import openai python package. " "Please install it with `pip install openai`." ) - if values["streaming"] and values["n"] > 1: - raise ValueError("Cannot stream results when n > 1.") - if values["streaming"] and values["best_of"] > 1: - raise ValueError("Cannot stream results when best_of > 1.") + + if is_openai_v1(): + client_params = { + "api_key": values["openai_api_key"], + "organization": values["openai_organization"], + "base_url": values["openai_api_base"], + "timeout": values["request_timeout"], + "max_retries": values["max_retries"], + "default_headers": values["default_headers"], + "default_query": values["default_query"], + "http_client": values["http_client"], + } + values["client"] = openai.OpenAI(**client_params).completions + values["async_client"] = openai.AsyncOpenAI(**client_params).completions + else: + values["client"] = openai.Completion + return values @property def _default_params(self) -> Dict[str, Any]: """Get the default parameters for calling OpenAI API.""" - normal_params = { + normal_params: Dict[str, Any] = { "temperature": self.temperature, - "max_tokens": self.max_tokens, "top_p": self.top_p, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, "n": self.n, - "request_timeout": self.request_timeout, "logit_bias": self.logit_bias, } + if self.max_tokens is not None: + normal_params["max_tokens"] = self.max_tokens + if self.request_timeout is not None and not is_openai_v1(): + normal_params["request_timeout"] = self.request_timeout + # Azure gpt-35-turbo doesn't support best_of # don't specify best_of if it is 1 if self.best_of > 1: @@ -308,6 +350,8 @@ class BaseOpenAI(BaseLLM): for stream_resp in completion_with_retry( self, prompt=prompt, run_manager=run_manager, **params ): + if not isinstance(stream_resp, dict): + stream_resp = stream_resp.dict() chunk = _stream_response_to_generation_chunk(stream_resp) yield chunk if run_manager: @@ -332,6 +376,8 @@ class BaseOpenAI(BaseLLM): async for stream_resp in await acompletion_with_retry( self, prompt=prompt, run_manager=run_manager, **params ): + if not isinstance(stream_resp, dict): + stream_resp = stream_resp.dict() chunk = _stream_response_to_generation_chunk(stream_resp) yield chunk if run_manager: @@ -374,6 +420,7 @@ class BaseOpenAI(BaseLLM): # Get the token usage from the response. # Includes prompt, completion, and total tokens used. _keys = {"completion_tokens", "prompt_tokens", "total_tokens"} + system_fingerprint: Optional[str] = None for _prompts in sub_prompts: if self.streaming: if len(_prompts) > 1: @@ -401,9 +448,21 @@ class BaseOpenAI(BaseLLM): response = completion_with_retry( self, prompt=_prompts, run_manager=run_manager, **params ) + if not isinstance(response, dict): + # V1 client returns the response in an PyDantic object instead of + # dict. For the transition period, we deep convert it to dict. + response = response.dict() + choices.extend(response["choices"]) update_token_usage(_keys, response, token_usage) - return self.create_llm_result(choices, prompts, token_usage) + if not system_fingerprint: + system_fingerprint = response.get("system_fingerprint") + return self.create_llm_result( + choices, + prompts, + token_usage, + system_fingerprint=system_fingerprint, + ) async def _agenerate( self, @@ -421,6 +480,7 @@ class BaseOpenAI(BaseLLM): # Get the token usage from the response. # Includes prompt, completion, and total tokens used. _keys = {"completion_tokens", "prompt_tokens", "total_tokens"} + system_fingerprint: Optional[str] = None for _prompts in sub_prompts: if self.streaming: if len(_prompts) > 1: @@ -450,9 +510,16 @@ class BaseOpenAI(BaseLLM): response = await acompletion_with_retry( self, prompt=_prompts, run_manager=run_manager, **params ) + if not isinstance(response, dict): + response = response.dict() choices.extend(response["choices"]) update_token_usage(_keys, response, token_usage) - return self.create_llm_result(choices, prompts, token_usage) + return self.create_llm_result( + choices, + prompts, + token_usage, + system_fingerprint=system_fingerprint, + ) def get_sub_prompts( self, @@ -478,7 +545,12 @@ class BaseOpenAI(BaseLLM): return sub_prompts def create_llm_result( - self, choices: Any, prompts: List[str], token_usage: Dict[str, int] + self, + choices: Any, + prompts: List[str], + token_usage: Dict[str, int], + *, + system_fingerprint: Optional[str] = None, ) -> LLMResult: """Create the LLMResult from the choices and prompts.""" generations = [] @@ -497,16 +569,22 @@ class BaseOpenAI(BaseLLM): ] ) llm_output = {"token_usage": token_usage, "model_name": self.model_name} + if system_fingerprint: + llm_output["system_fingerprint"] = system_fingerprint return LLMResult(generations=generations, llm_output=llm_output) @property def _invocation_params(self) -> Dict[str, Any]: """Get the parameters used to invoke the model.""" - openai_creds: Dict[str, Any] = { - "api_key": self.openai_api_key, - "api_base": self.openai_api_base, - "organization": self.openai_organization, - } + openai_creds: Dict[str, Any] = {} + if not is_openai_v1(): + openai_creds.update( + { + "api_key": self.openai_api_key, + "api_base": self.openai_api_base, + "organization": self.openai_organization, + } + ) if self.openai_proxy: import openai @@ -731,12 +809,21 @@ class OpenAIChat(BaseLLM): """ client: Any #: :meta private: + + # this is for compatibility with Union types in helper functions + async_client: Any #: :meta private: model_name: str = "gpt-3.5-turbo" """Model name to use.""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" - openai_api_key: Optional[str] = None - openai_api_base: Optional[str] = None + # When updating this to use a SecretStr + # Check for classes that derive from this class (as some of them + # may assume openai_api_key is a str) + openai_api_key: Optional[str] = Field(default=None, alias="api_key") + """Automatically inferred from env var `OPENAI_API_KEY` if not provided.""" + openai_api_base: Optional[str] = Field(default=None, alias="base_url") + """Base URL path for API requests, leave blank if not using a proxy or service + emulator.""" # to support explicit proxy for OpenAI openai_proxy: Optional[str] = None max_retries: int = 6 @@ -850,6 +937,8 @@ class OpenAIChat(BaseLLM): for stream_resp in completion_with_retry( self, messages=messages, run_manager=run_manager, **params ): + if not isinstance(stream_resp, dict): + stream_resp = stream_resp.dict() token = stream_resp["choices"][0]["delta"].get("content", "") chunk = GenerationChunk(text=token) yield chunk @@ -868,6 +957,8 @@ class OpenAIChat(BaseLLM): async for stream_resp in await acompletion_with_retry( self, messages=messages, run_manager=run_manager, **params ): + if not isinstance(stream_resp, dict): + stream_resp = stream_resp.dict() token = stream_resp["choices"][0]["delta"].get("content", "") chunk = GenerationChunk(text=token) yield chunk @@ -896,6 +987,8 @@ class OpenAIChat(BaseLLM): full_response = completion_with_retry( self, messages=messages, run_manager=run_manager, **params ) + if not isinstance(full_response, dict): + full_response = full_response.dict() llm_output = { "token_usage": full_response["usage"], "model_name": self.model_name, @@ -929,6 +1022,8 @@ class OpenAIChat(BaseLLM): full_response = await acompletion_with_retry( self, messages=messages, run_manager=run_manager, **params ) + if not isinstance(full_response, dict): + full_response = full_response.dict() llm_output = { "token_usage": full_response["usage"], "model_name": self.model_name, diff --git a/libs/langchain/langchain/utils/openai.py b/libs/langchain/langchain/utils/openai.py index 6f0c4cfe40c..d404d82e354 100644 --- a/libs/langchain/langchain/utils/openai.py +++ b/libs/langchain/langchain/utils/openai.py @@ -2,9 +2,9 @@ from __future__ import annotations from importlib.metadata import version -from packaging.version import Version, parse +from packaging.version import parse def is_openai_v1() -> bool: _version = parse(version("openai")) - return _version >= Version("1.0.0") + return _version.major >= 1 diff --git a/libs/langchain/tests/integration_tests/llms/test_openai.py b/libs/langchain/tests/integration_tests/llms/test_openai.py index 4ec967667b3..07e24f9a86e 100644 --- a/libs/langchain/tests/integration_tests/llms/test_openai.py +++ b/libs/langchain/tests/integration_tests/llms/test_openai.py @@ -238,13 +238,6 @@ async def test_openai_async_streaming_callback() -> None: assert isinstance(result, LLMResult) -def test_openai_chat_wrong_class() -> None: - """Test OpenAIChat with wrong class still works.""" - llm = OpenAI(model_name="gpt-3.5-turbo") - output = llm("Say foo:") - assert isinstance(output, str) - - def test_openai_modelname_to_contextsize_valid() -> None: """Test model name to context size on a valid model.""" assert OpenAI().modelname_to_contextsize("davinci") == 2049