style: some cleanup (#33857)

2026-06-09 10:17:00 +00:00 · 2025-11-06 23:50:46 -05:00
parent d40e340479
commit e023201d42
79 changed files with 662 additions and 531 deletions
--- a/libs/partners/openai/langchain_openai/chat_models/azure.py
+++ b/libs/partners/openai/langchain_openai/chat_models/azure.py
@@ -837,10 +837,10 @@ class AzureChatOpenAI(BaseChatOpenAI):
        Args:
            schema: The output schema. Can be passed in as:

-                - a JSON Schema,
-                - a `TypedDict` class,
-                - a Pydantic class,
-                - or an OpenAI function/tool schema.
+                - A JSON Schema,
+                - A `TypedDict` class,
+                - A Pydantic class,
+                - Or an OpenAI function/tool schema.

                If `schema` is a Pydantic class then the model output will be a
                Pydantic instance of that class, and the model-generated fields will be
@@ -869,11 +869,15 @@ class AzureChatOpenAI(BaseChatOpenAI):
                support which methods [here](https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format).

            include_raw:
-                If `False` then only the parsed structured output is returned. If
-                an error occurs during model output parsing it will be raised. If `True`
-                then both the raw model response (a `BaseMessage`) and the parsed model
-                response will be returned. If an error occurs during output parsing it
-                will be caught and returned as well.
+                If `False` then only the parsed structured output is returned.
+
+                If an error occurs during model output parsing it will be raised.
+
+                If `True` then both the raw model response (a `BaseMessage`) and the
+                parsed model response will be returned.
+
+                If an error occurs during output parsing it will be caught and returned
+                as well.

                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
                `'parsing_error'`.
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -463,16 +463,23 @@ _DictOrPydantic: TypeAlias = dict | _BM
 class BaseChatOpenAI(BaseChatModel):
    """Base wrapper around OpenAI large language models for chat."""

-    client: Any = Field(default=None, exclude=True)  #: :meta private:
-    async_client: Any = Field(default=None, exclude=True)  #: :meta private:
-    root_client: Any = Field(default=None, exclude=True)  #: :meta private:
-    root_async_client: Any = Field(default=None, exclude=True)  #: :meta private:
+    client: Any = Field(default=None, exclude=True)
+
+    async_client: Any = Field(default=None, exclude=True)
+
+    root_client: Any = Field(default=None, exclude=True)
+
+    root_async_client: Any = Field(default=None, exclude=True)
+
    model_name: str = Field(default="gpt-3.5-turbo", alias="model")
    """Model name to use."""
+
    temperature: float | None = None
    """What sampling temperature to use."""
+
    model_kwargs: dict[str, Any] = Field(default_factory=dict)
    """Holds any model parameters valid for `create` call not explicitly specified."""
+
    openai_api_key: (
        SecretStr | None | Callable[[], str] | Callable[[], Awaitable[str]]
    ) = Field(
@@ -524,19 +531,24 @@ class BaseChatOpenAI(BaseChatModel):
        model = ChatOpenAI(model="gpt-5-nano", api_key=get_api_key)
        ```
    """
+
    openai_api_base: str | None = Field(default=None, alias="base_url")
    """Base URL path for API requests, leave blank if not using a proxy or service emulator."""  # noqa: E501
+
    openai_organization: str | None = Field(default=None, alias="organization")
    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
+
    # to support explicit proxy for OpenAI
    openai_proxy: str | None = Field(
        default_factory=from_env("OPENAI_PROXY", default=None)
    )
+
    request_timeout: float | tuple[float, float] | Any | None = Field(
        default=None, alias="timeout"
    )
    """Timeout for requests to OpenAI completion API. Can be float, `httpx.Timeout` or
    `None`."""
+
    stream_usage: bool | None = None
    """Whether to include usage metadata in streaming output. If enabled, an additional
    message chunk will be generated during the stream including usage metadata.
@@ -550,30 +562,42 @@ class BaseChatOpenAI(BaseChatModel):
    !!! warning "Behavior changed in `langchain-openai` 0.3.35"
        Enabled for default base URL and client.
    """
+
    max_retries: int | None = None
    """Maximum number of retries to make when generating."""
+
    presence_penalty: float | None = None
    """Penalizes repeated tokens."""
+
    frequency_penalty: float | None = None
    """Penalizes repeated tokens according to frequency."""
+
    seed: int | None = None
    """Seed for generation"""
+
    logprobs: bool | None = None
    """Whether to return logprobs."""
+
    top_logprobs: int | None = None
    """Number of most likely tokens to return at each token position, each with an
    associated log probability. `logprobs` must be set to true if this parameter is
    used."""
+
    logit_bias: dict[int, int] | None = None
    """Modify the likelihood of specified tokens appearing in the completion."""
+
    streaming: bool = False
    """Whether to stream the results or not."""
+
    n: int | None = None
    """Number of chat completions to generate for each prompt."""
+
    top_p: float | None = None
    """Total probability mass of tokens to consider at each step."""
+
    max_tokens: int | None = Field(default=None)
    """Maximum number of tokens to generate."""
+
    reasoning_effort: str | None = None
    """Constrains effort on reasoning for reasoning models. For use with the Chat
    Completions API.
@@ -584,6 +608,7 @@ class BaseChatOpenAI(BaseChatModel):
    `'high'`. Reducing reasoning effort can result in faster responses and fewer
    tokens used on reasoning in a response.
    """
+
    reasoning: dict[str, Any] | None = None
    """Reasoning parameters for reasoning models. For use with the Responses API.

@@ -596,6 +621,7 @@ class BaseChatOpenAI(BaseChatModel):

    !!! version-added "Added in `langchain-openai` 0.3.24"
    """
+
    verbosity: str | None = None
    """Controls the verbosity level of responses for reasoning models. For use with the
    Responses API.
@@ -604,6 +630,7 @@ class BaseChatOpenAI(BaseChatModel):

    !!! version-added "Added in `langchain-openai` 0.3.28"
    """
+
    tiktoken_model_name: str | None = None
    """The model name to pass to tiktoken when using this class.
    Tiktoken is used to count the number of tokens in documents to constrain
@@ -614,19 +641,25 @@ class BaseChatOpenAI(BaseChatModel):
    when using one of the many model providers that expose an OpenAI-like
    API but with different models. In those cases, in order to avoid erroring
    when tiktoken is called, you can specify a model name to use here."""
+
    default_headers: Mapping[str, str] | None = None
+
    default_query: Mapping[str, object] | None = None
+
    # Configure a custom httpx client. See the
    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
    http_client: Any | None = Field(default=None, exclude=True)
    """Optional `httpx.Client`. Only used for sync invocations. Must specify
    `http_async_client` as well if you'd like a custom client for async invocations.
    """
+
    http_async_client: Any | None = Field(default=None, exclude=True)
    """Optional `httpx.AsyncClient`. Only used for async invocations. Must specify
    `http_client` as well if you'd like a custom client for sync invocations."""
+
    stop: list[str] | str | None = Field(default=None, alias="stop_sequences")
    """Default stop sequences."""
+
    extra_body: Mapping[str, Any] | None = None
    """Optional additional JSON properties to include in the request parameters when
    making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or other
@@ -649,6 +682,7 @@ class BaseChatOpenAI(BaseChatModel):

    include_response_headers: bool = False
    """Whether to include response headers in the output message `response_metadata`."""
+
    disabled_params: dict[str, Any] | None = Field(default=None)
    """Parameters of the OpenAI client or `chat.completions` endpoint that should be
    disabled for the given model.
@@ -1823,10 +1857,10 @@ class BaseChatOpenAI(BaseChatModel):
        Args:
            schema: The output schema. Can be passed in as:

-                - an OpenAI function/tool schema,
-                - a JSON Schema,
-                - a `TypedDict` class,
-                - or a Pydantic class.
+                - An OpenAI function/tool schema,
+                - A JSON Schema,
+                - A `TypedDict` class,
+                - Or a Pydantic class.

                If `schema` is a Pydantic class then the model output will be a
                Pydantic instance of that class, and the model-generated fields will be
@@ -1850,11 +1884,15 @@ class BaseChatOpenAI(BaseChatModel):
                    formatting the output into the desired schema into the model call

            include_raw:
-                If `False` then only the parsed structured output is returned. If
-                an error occurs during model output parsing it will be raised. If `True`
-                then both the raw model response (a `BaseMessage`) and the parsed model
-                response will be returned. If an error occurs during output parsing it
-                will be caught and returned as well.
+                If `False` then only the parsed structured output is returned.
+
+                If an error occurs during model output parsing it will be raised.
+
+                If `True` then both the raw model response (a `BaseMessage`) and the
+                parsed model response will be returned.
+
+                If an error occurs during output parsing it will be caught and returned
+                as well.

                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
                `'parsing_error'`.
@@ -2957,11 +2995,15 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                Learn more about the [differences between methods](https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format).

            include_raw:
-                If `False` then only the parsed structured output is returned. If
-                an error occurs during model output parsing it will be raised. If `True`
-                then both the raw model response (a `BaseMessage`) and the parsed model
-                response will be returned. If an error occurs during output parsing it
-                will be caught and returned as well.
+                If `False` then only the parsed structured output is returned.
+
+                If an error occurs during model output parsing it will be raised.
+
+                If `True` then both the raw model response (a `BaseMessage`) and the
+                parsed model response will be returned.
+
+                If an error occurs during output parsing it will be caught and returned
+                as well.

                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
                `'parsing_error'`.
--- a/libs/partners/openai/langchain_openai/embeddings/base.py
+++ b/libs/partners/openai/langchain_openai/embeddings/base.py
@@ -159,44 +159,55 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
        ```
    """

-    client: Any = Field(default=None, exclude=True)  #: :meta private:
-    async_client: Any = Field(default=None, exclude=True)  #: :meta private:
+    client: Any = Field(default=None, exclude=True)
+
+    async_client: Any = Field(default=None, exclude=True)
+
    model: str = "text-embedding-ada-002"
+
    dimensions: int | None = None
    """The number of dimensions the resulting output embeddings should have.

    Only supported in `text-embedding-3` and later models.
    """
+
    # to support Azure OpenAI Service custom deployment names
    deployment: str | None = model
+
    # TODO: Move to AzureOpenAIEmbeddings.
    openai_api_version: str | None = Field(
        default_factory=from_env("OPENAI_API_VERSION", default=None),
        alias="api_version",
    )
    """Automatically inferred from env var `OPENAI_API_VERSION` if not provided."""
+
    # to support Azure OpenAI Service custom endpoints
    openai_api_base: str | None = Field(
        alias="base_url", default_factory=from_env("OPENAI_API_BASE", default=None)
    )
    """Base URL path for API requests, leave blank if not using a proxy or service
        emulator."""
+
    # to support Azure OpenAI Service custom endpoints
    openai_api_type: str | None = Field(
        default_factory=from_env("OPENAI_API_TYPE", default=None)
    )
+
    # to support explicit proxy for OpenAI
    openai_proxy: str | None = Field(
        default_factory=from_env("OPENAI_PROXY", default=None)
    )
+
    embedding_ctx_length: int = 8191
    """The maximum number of tokens to embed at once."""
+
    openai_api_key: (
        SecretStr | None | Callable[[], str] | Callable[[], Awaitable[str]]
    ) = Field(
        alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)
    )
    """Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
+
    openai_organization: str | None = Field(
        alias="organization",
        default_factory=from_env(
@@ -204,21 +215,29 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
        ),
    )
    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
+
    allowed_special: Literal["all"] | set[str] | None = None
+
    disallowed_special: Literal["all"] | set[str] | Sequence[str] | None = None
+
    chunk_size: int = 1000
    """Maximum number of texts to embed in each batch"""
+
    max_retries: int = 2
    """Maximum number of retries to make when generating."""
+
    request_timeout: float | tuple[float, float] | Any | None = Field(
        default=None, alias="timeout"
    )
    """Timeout for requests to OpenAI completion API. Can be float, `httpx.Timeout` or
    None."""
+
    headers: Any = None
+
    tiktoken_enabled: bool = True
    """Set this to False for non-OpenAI implementations of the embeddings API, e.g.
    the `--extensions openai` extension for `text-generation-webui`"""
+
    tiktoken_model_name: str | None = None
    """The model name to pass to tiktoken when using this class.
    Tiktoken is used to count the number of tokens in documents to constrain
@@ -229,28 +248,39 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
    when using one of the many model providers that expose an OpenAI-like
    API but with different models. In those cases, in order to avoid erroring
    when tiktoken is called, you can specify a model name to use here."""
+
    show_progress_bar: bool = False
    """Whether to show a progress bar when embedding."""
+
    model_kwargs: dict[str, Any] = Field(default_factory=dict)
    """Holds any model parameters valid for `create` call not explicitly specified."""
+
    skip_empty: bool = False
    """Whether to skip empty strings when embedding or raise an error."""
+
    default_headers: Mapping[str, str] | None = None
+
    default_query: Mapping[str, object] | None = None
+
    # Configure a custom httpx client. See the
    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+
    retry_min_seconds: int = 4
    """Min number of seconds to wait between retries"""
+
    retry_max_seconds: int = 20
    """Max number of seconds to wait between retries"""
+
    http_client: Any | None = None
    """Optional `httpx.Client`. Only used for sync invocations. Must specify
        `http_async_client` as well if you'd like a custom client for async
        invocations.
    """
+
    http_async_client: Any | None = None
    """Optional `httpx.AsyncClient`. Only used for async invocations. Must specify
        `http_client` as well if you'd like a custom client for sync invocations."""
+
    check_embedding_ctx_length: bool = True
    """Whether to check the token length of inputs and automatically split inputs
        longer than embedding_ctx_length."""
--- a/libs/partners/openai/langchain_openai/llms/base.py
+++ b/libs/partners/openai/langchain_openai/llms/base.py
@@ -164,37 +164,50 @@ class BaseOpenAI(BaseLLM):

    """

-    client: Any = Field(default=None, exclude=True)  #: :meta private:
-    async_client: Any = Field(default=None, exclude=True)  #: :meta private:
+    client: Any = Field(default=None, exclude=True)
+
+    async_client: Any = Field(default=None, exclude=True)
+
    model_name: str = Field(default="gpt-3.5-turbo-instruct", alias="model")
    """Model name to use."""
+
    temperature: float = 0.7
    """What sampling temperature to use."""
+
    max_tokens: int = 256
    """The maximum number of tokens to generate in the completion.
    -1 returns as many tokens as possible given the prompt and
    the models maximal context size."""
+
    top_p: float = 1
    """Total probability mass of tokens to consider at each step."""
+
    frequency_penalty: float = 0
    """Penalizes repeated tokens according to frequency."""
+
    presence_penalty: float = 0
    """Penalizes repeated tokens."""
+
    n: int = 1
    """How many completions to generate for each prompt."""
+
    best_of: int = 1
    """Generates best_of completions server-side and returns the "best"."""
+
    model_kwargs: dict[str, Any] = Field(default_factory=dict)
    """Holds any model parameters valid for `create` call not explicitly specified."""
+
    openai_api_key: SecretStr | None | Callable[[], str] = Field(
        alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)
    )
    """Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
+
    openai_api_base: str | None = Field(
        alias="base_url", default_factory=from_env("OPENAI_API_BASE", default=None)
    )
    """Base URL path for API requests, leave blank if not using a proxy or service
        emulator."""
+
    openai_organization: str | None = Field(
        alias="organization",
        default_factory=from_env(
@@ -202,32 +215,43 @@ class BaseOpenAI(BaseLLM):
        ),
    )
    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
+
    # to support explicit proxy for OpenAI
    openai_proxy: str | None = Field(
        default_factory=from_env("OPENAI_PROXY", default=None)
    )
+
    batch_size: int = 20
    """Batch size to use when passing multiple documents to generate."""
+
    request_timeout: float | tuple[float, float] | Any | None = Field(
        default=None, alias="timeout"
    )
    """Timeout for requests to OpenAI completion API. Can be float, `httpx.Timeout` or
    None."""
+
    logit_bias: dict[str, float] | None = None
    """Adjust the probability of specific tokens being generated."""
+
    max_retries: int = 2
    """Maximum number of retries to make when generating."""
+
    seed: int | None = None
    """Seed for generation"""
+
    logprobs: int | None = None
    """Include the log probabilities on the logprobs most likely output tokens,
    as well the chosen tokens."""
+
    streaming: bool = False
    """Whether to stream the results or not."""
+
    allowed_special: Literal["all"] | set[str] = set()
    """Set of special tokens that are allowed。"""
+
    disallowed_special: Literal["all"] | Collection[str] = "all"
    """Set of special tokens that are not allowed。"""
+
    tiktoken_model_name: str | None = None
    """The model name to pass to tiktoken when using this class.
    Tiktoken is used to count the number of tokens in documents to constrain
@@ -238,8 +262,11 @@ class BaseOpenAI(BaseLLM):
    when using one of the many model providers that expose an OpenAI-like
    API but with different models. In those cases, in order to avoid erroring
    when tiktoken is called, you can specify a model name to use here."""
+
    default_headers: Mapping[str, str] | None = None
+
    default_query: Mapping[str, object] | None = None
+
    # Configure a custom httpx client. See the
    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
    http_client: Any | None = None
@@ -247,9 +274,11 @@ class BaseOpenAI(BaseLLM):
        `http_async_client` as well if you'd like a custom client for async
        invocations.
    """
+
    http_async_client: Any | None = None
    """Optional `httpx.AsyncClient`. Only used for async invocations. Must specify
        `http_client` as well if you'd like a custom client for sync invocations."""
+
    extra_body: Mapping[str, Any] | None = None
    """Optional additional JSON properties to include in the request parameters when
    making requests to OpenAI compatible APIs, such as vLLM."""
--- a/libs/partners/openai/pyproject.toml
+++ b/libs/partners/openai/pyproject.toml
@@ -3,18 +3,19 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"

 [project]
-authors = []
+name = "langchain-openai"
+description = "An integration package connecting OpenAI and LangChain"
 license = { text = "MIT" }
+readme = "README.md"
+authors = []
+
+version = "1.0.2"
 requires-python = ">=3.10.0,<4.0.0"
 dependencies = [
    "langchain-core>=1.0.2,<2.0.0",
    "openai>=1.109.1,<3.0.0",
    "tiktoken>=0.7.0,<1.0.0",
 ]
-name = "langchain-openai"
-version = "1.0.2"
-description = "An integration package connecting OpenAI and LangChain"
-readme = "README.md"

 [project.urls]
 Homepage = "https://docs.langchain.com/oss/python/integrations/providers/openai"