multiple: pydantic 2 compatibility, v0.3 (#26443)

Signed-off-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com> Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com> Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: ccurme <chester.curme@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com> Co-authored-by: ZhangShenao <15201440436@163.com> Co-authored-by: Friso H. Kingma <fhkingma@gmail.com> Co-authored-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Nuno Campos <nuno@langchain.dev> Co-authored-by: Morgante Pell <morgantep@google.com>
2025-09-16 15:04:13 +00:00 · 2024-09-13 14:38:45 -07:00
parent d9813bdbbc
commit c2a3021bb0
1402 changed files with 38318 additions and 30410 deletions
--- a/libs/partners/openai/langchain_openai/embeddings/azure.py
+++ b/libs/partners/openai/langchain_openai/embeddings/azure.py
@@ -2,11 +2,12 @@

 from __future__ import annotations

-from typing import Callable, Dict, Optional, Union
+from typing import Callable, Optional, Union

 import openai
-from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
 from langchain_core.utils import from_env, secret_from_env
+from pydantic import Field, SecretStr, model_validator
+from typing_extensions import Self, cast

 from langchain_openai.embeddings.base import OpenAIEmbeddings

@@ -154,21 +155,21 @@ class AzureOpenAIEmbeddings(OpenAIEmbeddings):
    chunk_size: int = 2048
    """Maximum number of texts to embed in each batch"""

-    @root_validator(pre=False, skip_on_failure=True)
-    def validate_environment(cls, values: Dict) -> Dict:
+    @model_validator(mode="after")
+    def validate_environment(self) -> Self:
        """Validate that api key and python package exists in environment."""
        # For backwards compatibility. Before openai v1, no distinction was made
        # between azure_endpoint and base_url (openai_api_base).
-        openai_api_base = values["openai_api_base"]
-        if openai_api_base and values["validate_base_url"]:
+        openai_api_base = self.openai_api_base
+        if openai_api_base and self.validate_base_url:
            if "/openai" not in openai_api_base:
-                values["openai_api_base"] += "/openai"
+                self.openai_api_base = cast(str, self.openai_api_base) + "/openai"
                raise ValueError(
                    "As of openai>=1.0.0, Azure endpoints should be specified via "
                    "the `azure_endpoint` param not `openai_api_base` "
                    "(or alias `base_url`). "
                )
-            if values["deployment"]:
+            if self.deployment:
                raise ValueError(
                    "As of openai>=1.0.0, if `deployment` (or alias "
                    "`azure_deployment`) is specified then "
@@ -176,39 +177,37 @@ class AzureOpenAIEmbeddings(OpenAIEmbeddings):
                    "Instead use `deployment` (or alias `azure_deployment`) "
                    "and `azure_endpoint`."
                )
-        client_params = {
-            "api_version": values["openai_api_version"],
-            "azure_endpoint": values["azure_endpoint"],
-            "azure_deployment": values["deployment"],
+        client_params: dict = {
+            "api_version": self.openai_api_version,
+            "azure_endpoint": self.azure_endpoint,
+            "azure_deployment": self.deployment,
            "api_key": (
-                values["openai_api_key"].get_secret_value()
-                if values["openai_api_key"]
-                else None
+                self.openai_api_key.get_secret_value() if self.openai_api_key else None
            ),
            "azure_ad_token": (
-                values["azure_ad_token"].get_secret_value()
-                if values["azure_ad_token"]
-                else None
+                self.azure_ad_token.get_secret_value() if self.azure_ad_token else None
            ),
-            "azure_ad_token_provider": values["azure_ad_token_provider"],
-            "organization": values["openai_organization"],
-            "base_url": values["openai_api_base"],
-            "timeout": values["request_timeout"],
-            "max_retries": values["max_retries"],
-            "default_headers": values["default_headers"],
-            "default_query": values["default_query"],
+            "azure_ad_token_provider": self.azure_ad_token_provider,
+            "organization": self.openai_organization,
+            "base_url": self.openai_api_base,
+            "timeout": self.request_timeout,
+            "max_retries": self.max_retries,
+            "default_headers": self.default_headers,
+            "default_query": self.default_query,
        }
-        if not values.get("client"):
-            sync_specific = {"http_client": values["http_client"]}
-            values["client"] = openai.AzureOpenAI(
-                **client_params, **sync_specific
+        if not self.client:
+            sync_specific: dict = {"http_client": self.http_client}
+            self.client = openai.AzureOpenAI(
+                **client_params,  # type: ignore[arg-type]
+                **sync_specific,
            ).embeddings
-        if not values.get("async_client"):
-            async_specific = {"http_client": values["http_async_client"]}
-            values["async_client"] = openai.AsyncAzureOpenAI(
-                **client_params, **async_specific
+        if not self.async_client:
+            async_specific: dict = {"http_client": self.http_async_client}
+            self.async_client = openai.AsyncAzureOpenAI(
+                **client_params,  # type: ignore[arg-type]
+                **async_specific,
            ).embeddings
-        return values
+        return self

    @property
    def _llm_type(self) -> str:
--- a/libs/partners/openai/langchain_openai/embeddings/base.py
+++ b/libs/partners/openai/langchain_openai/embeddings/base.py
@@ -20,8 +20,9 @@ from typing import (
 import openai
 import tiktoken
 from langchain_core.embeddings import Embeddings
-from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
 from langchain_core.utils import from_env, get_pydantic_field_names, secret_from_env
+from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
+from typing_extensions import Self

 logger = logging.getLogger(__name__)

@@ -263,14 +264,13 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
    """Whether to check the token length of inputs and automatically split inputs 
        longer than embedding_ctx_length."""

-    class Config:
-        """Configuration for this pydantic object."""
+    model_config = ConfigDict(
+        extra="forbid", populate_by_name=True, protected_namespaces=()
+    )

-        extra = "forbid"
-        allow_population_by_field_name = True
-
-    @root_validator(pre=True)
-    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+    @model_validator(mode="before")
+    @classmethod
+    def build_extra(cls, values: Dict[str, Any]) -> Any:
        """Build extra kwargs from additional params that were passed in."""
        all_required_field_names = get_pydantic_field_names(cls)
        extra = values.get("model_kwargs", {})
@@ -295,41 +295,37 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
        values["model_kwargs"] = extra
        return values

-    @root_validator(pre=False, skip_on_failure=True, allow_reuse=True)
-    def validate_environment(cls, values: Dict) -> Dict:
+    @model_validator(mode="after")
+    def validate_environment(self) -> Self:
        """Validate that api key and python package exists in environment."""
-        if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
+        if self.openai_api_type in ("azure", "azure_ad", "azuread"):
            raise ValueError(
                "If you are using Azure, "
                "please use the `AzureOpenAIEmbeddings` class."
            )
-        client_params = {
+        client_params: dict = {
            "api_key": (
-                values["openai_api_key"].get_secret_value()
-                if values["openai_api_key"]
-                else None
+                self.openai_api_key.get_secret_value() if self.openai_api_key else None
            ),
-            "organization": values["openai_organization"],
-            "base_url": values["openai_api_base"],
-            "timeout": values["request_timeout"],
-            "max_retries": values["max_retries"],
-            "default_headers": values["default_headers"],
-            "default_query": values["default_query"],
+            "organization": self.openai_organization,
+            "base_url": self.openai_api_base,
+            "timeout": self.request_timeout,
+            "max_retries": self.max_retries,
+            "default_headers": self.default_headers,
+            "default_query": self.default_query,
        }

-        if values["openai_proxy"] and (
-            values["http_client"] or values["http_async_client"]
-        ):
-            openai_proxy = values["openai_proxy"]
-            http_client = values["http_client"]
-            http_async_client = values["http_async_client"]
+        if self.openai_proxy and (self.http_client or self.http_async_client):
+            openai_proxy = self.openai_proxy
+            http_client = self.http_client
+            http_async_client = self.http_async_client
            raise ValueError(
                "Cannot specify 'openai_proxy' if one of "
                "'http_client'/'http_async_client' is already specified. Received:\n"
                f"{openai_proxy=}\n{http_client=}\n{http_async_client=}"
            )
-        if not values.get("client"):
-            if values["openai_proxy"] and not values["http_client"]:
+        if not self.client:
+            if self.openai_proxy and not self.http_client:
                try:
                    import httpx
                except ImportError as e:
@@ -337,13 +333,11 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
                        "Could not import httpx python package. "
                        "Please install it with `pip install httpx`."
                    ) from e
-                values["http_client"] = httpx.Client(proxy=values["openai_proxy"])
-            sync_specific = {"http_client": values["http_client"]}
-            values["client"] = openai.OpenAI(
-                **client_params, **sync_specific
-            ).embeddings
-        if not values.get("async_client"):
-            if values["openai_proxy"] and not values["http_async_client"]:
+                self.http_client = httpx.Client(proxy=self.openai_proxy)
+            sync_specific = {"http_client": self.http_client}
+            self.client = openai.OpenAI(**client_params, **sync_specific).embeddings  # type: ignore[arg-type]
+        if not self.async_client:
+            if self.openai_proxy and not self.http_async_client:
                try:
                    import httpx
                except ImportError as e:
@@ -351,14 +345,13 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
                        "Could not import httpx python package. "
                        "Please install it with `pip install httpx`."
                    ) from e
-                values["http_async_client"] = httpx.AsyncClient(
-                    proxy=values["openai_proxy"]
-                )
-            async_specific = {"http_client": values["http_async_client"]}
-            values["async_client"] = openai.AsyncOpenAI(
-                **client_params, **async_specific
+                self.http_async_client = httpx.AsyncClient(proxy=self.openai_proxy)
+            async_specific = {"http_client": self.http_async_client}
+            self.async_client = openai.AsyncOpenAI(
+                **client_params,
+                **async_specific,  # type: ignore[arg-type]
            ).embeddings
-        return values
+        return self

    @property
    def _invocation_params(self) -> Dict[str, Any]: