Fix langchain.llms OpenAI completion doesn't work due to v1 client update (#13099)

This commit fixes the issue that langchain.llms OpenAI completion stopped working since the V1 openai client update. Replace this entire comment with: - **Description:** This PR fixes the issue [AttributeError: module 'openai' has no attribute 'Completion'](https://github.com/langchain-ai/langchain/issues/12967) similar to 8e0cb2eb84 and https://github.com/langchain-ai/langchain/pull/12969, - **Issue:** https://github.com/langchain-ai/langchain/issues/12967, - **Dependencies:** `openai` v1.x.x client, - **Tag maintainer:** @baskaryan, - **Twitter handle:** @dosuken123 Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/extras` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-08-29 22:46:27 +00:00 · 2023-11-10 08:12:19 +09:00 · 2023-11-10 08:12:19 +09:00 · 28cc60b347
commit 28cc60b347
parent 555ce600ef
4 changed files with 137 additions and 47 deletions
--- a/libs/langchain/langchain/chat_models/openai.py
+++ b/libs/langchain/langchain/chat_models/openai.py
@ -327,6 +327,8 @@ class ChatOpenAI(BaseChatModel):
        }
        if self.max_tokens is not None:
            params["max_tokens"] = self.max_tokens
+        if self.request_timeout is not None and not is_openai_v1():
+            params["request_timeout"] = self.request_timeout
        return params

    def completion_with_retry(
--- a/libs/langchain/langchain/llms/openai.py
+++ b/libs/langchain/langchain/llms/openai.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 import logging
+import os
 import sys
 import warnings
 from typing import (
@ -29,6 +30,7 @@ from langchain.pydantic_v1 import Field, root_validator
 from langchain.schema import Generation, LLMResult
 from langchain.schema.output import GenerationChunk
 from langchain.utils import get_from_dict_or_env, get_pydantic_field_names
+from langchain.utils.openai import is_openai_v1
 from langchain.utils.utils import build_extra_kwargs

 logger = logging.getLogger(__name__)
@ -106,6 +108,9 @@ def completion_with_retry(
    **kwargs: Any,
 ) -> Any:
    """Use tenacity to retry the completion call."""
+    if is_openai_v1():
+        return llm.client.create(**kwargs)
+
    retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)

    @retry_decorator
@ -121,6 +126,9 @@ async def acompletion_with_retry(
    **kwargs: Any,
 ) -> Any:
    """Use tenacity to retry the async completion call."""
+    if is_openai_v1():
+        return await llm.async_client.create(**kwargs)
+
    retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)

    @retry_decorator
@ -141,13 +149,13 @@ class BaseOpenAI(BaseLLM):
    @property
    def lc_attributes(self) -> Dict[str, Any]:
        attributes: Dict[str, Any] = {}
-        if self.openai_api_base != "":
+        if self.openai_api_base:
            attributes["openai_api_base"] = self.openai_api_base

-        if self.openai_organization != "":
+        if self.openai_organization:
            attributes["openai_organization"] = self.openai_organization

-        if self.openai_proxy != "":
+        if self.openai_proxy:
            attributes["openai_proxy"] = self.openai_proxy

        return attributes
@ -157,6 +165,7 @@ class BaseOpenAI(BaseLLM):
        return True

    client: Any = None  #: :meta private:
+    async_client: Any = None  #: :meta private:
    model_name: str = Field(default="text-davinci-003", alias="model")
    """Model name to use."""
    temperature: float = 0.7
@ -177,18 +186,28 @@ class BaseOpenAI(BaseLLM):
    """Generates best_of completions server-side and returns the "best"."""
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Holds any model parameters valid for `create` call not explicitly specified."""
-    openai_api_key: Optional[str] = None
-    openai_api_base: Optional[str] = None
-    openai_organization: Optional[str] = None
+    # When updating this to use a SecretStr
+    # Check for classes that derive from this class (as some of them
+    # may assume openai_api_key is a str)
+    openai_api_key: Optional[str] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
+    openai_api_base: Optional[str] = Field(default=None, alias="base_url")
+    """Base URL path for API requests, leave blank if not using a proxy or service 
+        emulator."""
+    openai_organization: Optional[str] = Field(default=None, alias="organization")
+    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
    # to support explicit proxy for OpenAI
    openai_proxy: Optional[str] = None
    batch_size: int = 20
    """Batch size to use when passing multiple documents to generate."""
-    request_timeout: Optional[Union[float, Tuple[float, float]]] = None
-    """Timeout for requests to OpenAI completion API. Default is 600 seconds."""
+    request_timeout: Union[float, Tuple[float, float], Any, None] = Field(
+        default=None, alias="timeout"
+    )
+    """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or 
+        None."""
    logit_bias: Optional[Dict[str, float]] = Field(default_factory=dict)
    """Adjust the probability of specific tokens being generated."""
-    max_retries: int = 6
+    max_retries: int = 2
    """Maximum number of retries to make when generating."""
    streaming: bool = False
    """Whether to stream the results or not."""
@ -206,6 +225,12 @@ class BaseOpenAI(BaseLLM):
    when using one of the many model providers that expose an OpenAI-like 
    API but with different models. In those cases, in order to avoid erroring 
    when tiktoken is called, you can specify a model name to use here."""
+    default_headers: Union[Mapping[str, str], None] = None
+    default_query: Union[Mapping[str, object], None] = None
+    # Configure a custom httpx client. See the
+    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+    http_client: Union[Any, None] = None
+    """Optional httpx.Client."""

    def __new__(cls, **data: Any) -> Union[OpenAIChat, BaseOpenAI]:  # type: ignore
        """Initialize the OpenAI object."""
@ -239,14 +264,18 @@ class BaseOpenAI(BaseLLM):
    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that api key and python package exists in environment."""
+        if values["n"] < 1:
+            raise ValueError("n must be at least 1.")
+        if values["streaming"] and values["n"] > 1:
+            raise ValueError("Cannot stream results when n > 1.")
+        if values["streaming"] and values["best_of"] > 1:
+            raise ValueError("Cannot stream results when best_of > 1.")
+
        values["openai_api_key"] = get_from_dict_or_env(
            values, "openai_api_key", "OPENAI_API_KEY"
        )
-        values["openai_api_base"] = get_from_dict_or_env(
-            values,
-            "openai_api_base",
-            "OPENAI_API_BASE",
-            default="",
+        values["openai_api_base"] = values["openai_api_base"] or os.getenv(
+            "OPENAI_API_BASE"
        )
        values["openai_proxy"] = get_from_dict_or_env(
            values,
@ -254,41 +283,54 @@ class BaseOpenAI(BaseLLM):
            "OPENAI_PROXY",
            default="",
        )
-        values["openai_organization"] = get_from_dict_or_env(
-            values,
-            "openai_organization",
-            "OPENAI_ORGANIZATION",
-            default="",
+        values["openai_organization"] = (
+            values["openai_organization"]
+            or os.getenv("OPENAI_ORG_ID")
+            or os.getenv("OPENAI_ORGANIZATION")
        )
        try:
            import openai
-
-            values["client"] = openai.Completion
        except ImportError:
            raise ImportError(
                "Could not import openai python package. "
                "Please install it with `pip install openai`."
            )
-        if values["streaming"] and values["n"] > 1:
-            raise ValueError("Cannot stream results when n > 1.")
-        if values["streaming"] and values["best_of"] > 1:
-            raise ValueError("Cannot stream results when best_of > 1.")
+
+        if is_openai_v1():
+            client_params = {
+                "api_key": values["openai_api_key"],
+                "organization": values["openai_organization"],
+                "base_url": values["openai_api_base"],
+                "timeout": values["request_timeout"],
+                "max_retries": values["max_retries"],
+                "default_headers": values["default_headers"],
+                "default_query": values["default_query"],
+                "http_client": values["http_client"],
+            }
+            values["client"] = openai.OpenAI(**client_params).completions
+            values["async_client"] = openai.AsyncOpenAI(**client_params).completions
+        else:
+            values["client"] = openai.Completion
+
        return values

    @property
    def _default_params(self) -> Dict[str, Any]:
        """Get the default parameters for calling OpenAI API."""
-        normal_params = {
+        normal_params: Dict[str, Any] = {
            "temperature": self.temperature,
-            "max_tokens": self.max_tokens,
            "top_p": self.top_p,
            "frequency_penalty": self.frequency_penalty,
            "presence_penalty": self.presence_penalty,
            "n": self.n,
-            "request_timeout": self.request_timeout,
            "logit_bias": self.logit_bias,
        }

+        if self.max_tokens is not None:
+            normal_params["max_tokens"] = self.max_tokens
+        if self.request_timeout is not None and not is_openai_v1():
+            normal_params["request_timeout"] = self.request_timeout
+
        # Azure gpt-35-turbo doesn't support best_of
        # don't specify best_of if it is 1
        if self.best_of > 1:
@ -308,6 +350,8 @@ class BaseOpenAI(BaseLLM):
        for stream_resp in completion_with_retry(
            self, prompt=prompt, run_manager=run_manager, **params
        ):
+            if not isinstance(stream_resp, dict):
+                stream_resp = stream_resp.dict()
            chunk = _stream_response_to_generation_chunk(stream_resp)
            yield chunk
            if run_manager:
@ -332,6 +376,8 @@ class BaseOpenAI(BaseLLM):
        async for stream_resp in await acompletion_with_retry(
            self, prompt=prompt, run_manager=run_manager, **params
        ):
+            if not isinstance(stream_resp, dict):
+                stream_resp = stream_resp.dict()
            chunk = _stream_response_to_generation_chunk(stream_resp)
            yield chunk
            if run_manager:
@ -374,6 +420,7 @@ class BaseOpenAI(BaseLLM):
        # Get the token usage from the response.
        # Includes prompt, completion, and total tokens used.
        _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
+        system_fingerprint: Optional[str] = None
        for _prompts in sub_prompts:
            if self.streaming:
                if len(_prompts) > 1:
@ -401,9 +448,21 @@ class BaseOpenAI(BaseLLM):
                response = completion_with_retry(
                    self, prompt=_prompts, run_manager=run_manager, **params
                )
+                if not isinstance(response, dict):
+                    # V1 client returns the response in an PyDantic object instead of
+                    # dict. For the transition period, we deep convert it to dict.
+                    response = response.dict()
+
                choices.extend(response["choices"])
                update_token_usage(_keys, response, token_usage)
-        return self.create_llm_result(choices, prompts, token_usage)
+                if not system_fingerprint:
+                    system_fingerprint = response.get("system_fingerprint")
+        return self.create_llm_result(
+            choices,
+            prompts,
+            token_usage,
+            system_fingerprint=system_fingerprint,
+        )

    async def _agenerate(
        self,
@ -421,6 +480,7 @@ class BaseOpenAI(BaseLLM):
        # Get the token usage from the response.
        # Includes prompt, completion, and total tokens used.
        _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
+        system_fingerprint: Optional[str] = None
        for _prompts in sub_prompts:
            if self.streaming:
                if len(_prompts) > 1:
@ -450,9 +510,16 @@ class BaseOpenAI(BaseLLM):
                response = await acompletion_with_retry(
                    self, prompt=_prompts, run_manager=run_manager, **params
                )
+                if not isinstance(response, dict):
+                    response = response.dict()
                choices.extend(response["choices"])
                update_token_usage(_keys, response, token_usage)
-        return self.create_llm_result(choices, prompts, token_usage)
+        return self.create_llm_result(
+            choices,
+            prompts,
+            token_usage,
+            system_fingerprint=system_fingerprint,
+        )

    def get_sub_prompts(
        self,
@ -478,7 +545,12 @@ class BaseOpenAI(BaseLLM):
        return sub_prompts

    def create_llm_result(
-        self, choices: Any, prompts: List[str], token_usage: Dict[str, int]
+        self,
+        choices: Any,
+        prompts: List[str],
+        token_usage: Dict[str, int],
+        *,
+        system_fingerprint: Optional[str] = None,
    ) -> LLMResult:
        """Create the LLMResult from the choices and prompts."""
        generations = []
@ -497,16 +569,22 @@ class BaseOpenAI(BaseLLM):
                ]
            )
        llm_output = {"token_usage": token_usage, "model_name": self.model_name}
+        if system_fingerprint:
+            llm_output["system_fingerprint"] = system_fingerprint
        return LLMResult(generations=generations, llm_output=llm_output)

    @property
    def _invocation_params(self) -> Dict[str, Any]:
        """Get the parameters used to invoke the model."""
-        openai_creds: Dict[str, Any] = {
-            "api_key": self.openai_api_key,
-            "api_base": self.openai_api_base,
-            "organization": self.openai_organization,
-        }
+        openai_creds: Dict[str, Any] = {}
+        if not is_openai_v1():
+            openai_creds.update(
+                {
+                    "api_key": self.openai_api_key,
+                    "api_base": self.openai_api_base,
+                    "organization": self.openai_organization,
+                }
+            )
        if self.openai_proxy:
            import openai

@ -731,12 +809,21 @@ class OpenAIChat(BaseLLM):
    """

    client: Any  #: :meta private:
+
+    # this is for compatibility with Union types in helper functions
+    async_client: Any  #: :meta private:
    model_name: str = "gpt-3.5-turbo"
    """Model name to use."""
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Holds any model parameters valid for `create` call not explicitly specified."""
-    openai_api_key: Optional[str] = None
-    openai_api_base: Optional[str] = None
+    # When updating this to use a SecretStr
+    # Check for classes that derive from this class (as some of them
+    # may assume openai_api_key is a str)
+    openai_api_key: Optional[str] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
+    openai_api_base: Optional[str] = Field(default=None, alias="base_url")
+    """Base URL path for API requests, leave blank if not using a proxy or service 
+        emulator."""
    # to support explicit proxy for OpenAI
    openai_proxy: Optional[str] = None
    max_retries: int = 6
@ -850,6 +937,8 @@ class OpenAIChat(BaseLLM):
        for stream_resp in completion_with_retry(
            self, messages=messages, run_manager=run_manager, **params
        ):
+            if not isinstance(stream_resp, dict):
+                stream_resp = stream_resp.dict()
            token = stream_resp["choices"][0]["delta"].get("content", "")
            chunk = GenerationChunk(text=token)
            yield chunk
@ -868,6 +957,8 @@ class OpenAIChat(BaseLLM):
        async for stream_resp in await acompletion_with_retry(
            self, messages=messages, run_manager=run_manager, **params
        ):
+            if not isinstance(stream_resp, dict):
+                stream_resp = stream_resp.dict()
            token = stream_resp["choices"][0]["delta"].get("content", "")
            chunk = GenerationChunk(text=token)
            yield chunk
@ -896,6 +987,8 @@ class OpenAIChat(BaseLLM):
        full_response = completion_with_retry(
            self, messages=messages, run_manager=run_manager, **params
        )
+        if not isinstance(full_response, dict):
+            full_response = full_response.dict()
        llm_output = {
            "token_usage": full_response["usage"],
            "model_name": self.model_name,
@ -929,6 +1022,8 @@ class OpenAIChat(BaseLLM):
        full_response = await acompletion_with_retry(
            self, messages=messages, run_manager=run_manager, **params
        )
+        if not isinstance(full_response, dict):
+            full_response = full_response.dict()
        llm_output = {
            "token_usage": full_response["usage"],
            "model_name": self.model_name,
--- a/libs/langchain/langchain/utils/openai.py
+++ b/libs/langchain/langchain/utils/openai.py
@ -2,9 +2,9 @@ from __future__ import annotations

 from importlib.metadata import version

-from packaging.version import Version, parse
+from packaging.version import parse


 def is_openai_v1() -> bool:
    _version = parse(version("openai"))
-    return _version >= Version("1.0.0")
+    return _version.major >= 1
--- a/libs/langchain/tests/integration_tests/llms/test_openai.py
+++ b/libs/langchain/tests/integration_tests/llms/test_openai.py
@ -238,13 +238,6 @@ async def test_openai_async_streaming_callback() -> None:
    assert isinstance(result, LLMResult)


-def test_openai_chat_wrong_class() -> None:
-    """Test OpenAIChat with wrong class still works."""
-    llm = OpenAI(model_name="gpt-3.5-turbo")
-    output = llm("Say foo:")
-    assert isinstance(output, str)
-
-
 def test_openai_modelname_to_contextsize_valid() -> None:
    """Test model name to context size on a valid model."""
    assert OpenAI().modelname_to_contextsize("davinci") == 2049