huggingface[major]: upgrade pydantic (#26048)

2025-09-28 06:48:50 +00:00 · 2024-09-04 15:08:43 -04:00
parent 63ba12d8e0
commit ed771f2d2b
10 changed files with 90 additions and 221 deletions
--- a/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py
+++ b/libs/partners/huggingface/langchain_huggingface/chat_models/huggingface.py
@@ -29,10 +29,11 @@ from langchain_core.messages import (
    ToolMessage,
 )
 from langchain_core.outputs import ChatGeneration, ChatResult, LLMResult
-from langchain_core.pydantic_v1 import root_validator
 from langchain_core.runnables import Runnable
 from langchain_core.tools import BaseTool
 from langchain_core.utils.function_calling import convert_to_openai_tool
+from pydantic import model_validator
+from typing_extensions import Self

 from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint
 from langchain_huggingface.llms.huggingface_pipeline import HuggingFacePipeline
@@ -265,7 +266,7 @@ class ChatHuggingFace(BaseChatModel):
    Tool calling:
        .. code-block:: python

-            from langchain_core.pydantic_v1 import BaseModel, Field
+            from pydantic import BaseModel, Field

            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''
@@ -325,20 +326,20 @@ class ChatHuggingFace(BaseChatModel):
            else self.tokenizer
        )

-    @root_validator(pre=False, skip_on_failure=True)
-    def validate_llm(cls, values: dict) -> dict:
+    @model_validator(mode="after")
+    def validate_llm(self) -> Self:
        if (
-            not _is_huggingface_hub(values["llm"])
-            and not _is_huggingface_textgen_inference(values["llm"])
-            and not _is_huggingface_endpoint(values["llm"])
-            and not _is_huggingface_pipeline(values["llm"])
+            not _is_huggingface_hub(self.llm)
+            and not _is_huggingface_textgen_inference(self.llm)
+            and not _is_huggingface_endpoint(self.llm)
+            and not _is_huggingface_pipeline(self.llm)
        ):
            raise TypeError(
                "Expected llm to be one of HuggingFaceTextGenInference, "
                "HuggingFaceEndpoint, HuggingFaceHub, HuggingFacePipeline "
-                f"received {type(values['llm'])}"
+                f"received {type(self.llm)}"
            )
-        return values
+        return self

    def _create_chat_result(self, response: TGI_RESPONSE) -> ChatResult:
        generations = []
--- a/libs/partners/huggingface/langchain_huggingface/embeddings/huggingface.py
+++ b/libs/partners/huggingface/langchain_huggingface/embeddings/huggingface.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, List, Optional  # type: ignore[import-not-found]

 from langchain_core.embeddings import Embeddings
-from langchain_core.pydantic_v1 import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field

 DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"

@@ -62,10 +62,10 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
            self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
        )

-    class Config:
-        """Configuration for this pydantic object."""
-
-        extra = "forbid"
+    model_config = ConfigDict(
+        extra="forbid",
+        protected_namespaces=(),
+    )

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Compute doc embeddings using a HuggingFace transformer model.
--- a/libs/partners/huggingface/langchain_huggingface/embeddings/huggingface_endpoint.py
+++ b/libs/partners/huggingface/langchain_huggingface/embeddings/huggingface_endpoint.py
@@ -1,9 +1,11 @@
 import json
-from typing import Any, Dict, List, Optional
+import os
+from typing import Any, List, Optional

 from langchain_core.embeddings import Embeddings
-from langchain_core.pydantic_v1 import BaseModel, root_validator
-from langchain_core.utils import get_from_dict_or_env
+from langchain_core.utils import from_env
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+from typing_extensions import Self

 DEFAULT_MODEL = "sentence-transformers/all-mpnet-base-v2"
 VALID_TASKS = ("feature-extraction",)
@@ -39,22 +41,20 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
    model_kwargs: Optional[dict] = None
    """Keyword arguments to pass to the model."""

-    huggingfacehub_api_token: Optional[str] = None
+    huggingfacehub_api_token: Optional[str] = Field(
+        default_factory=from_env("HUGGINGFACEHUB_API_TOKEN", default=None)
+    )

-    class Config:
-        """Configuration for this pydantic object."""
+    model_config = ConfigDict(
+        extra="forbid",
+        protected_namespaces=(),
+    )

-        extra = "forbid"
-
-    @root_validator(pre=False, skip_on_failure=True)
-    def validate_environment(cls, values: Dict) -> Dict:
+    @model_validator(mode="after")
+    def validate_environment(self) -> Self:
        """Validate that api key and python package exists in environment."""
-        values["huggingfacehub_api_token"] = get_from_dict_or_env(
-            values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN", None
-        )
-
-        huggingfacehub_api_token = get_from_dict_or_env(
-            values, "huggingfacehub_api_token", "HF_TOKEN", None
+        huggingfacehub_api_token = self.huggingfacehub_api_token or os.getenv(
+            "HF_TOKEN"
        )

        try:
@@ -63,38 +63,38 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
                InferenceClient,
            )

-            if values["model"]:
-                values["repo_id"] = values["model"]
-            elif values["repo_id"]:
-                values["model"] = values["repo_id"]
+            if self.model:
+                self.repo_id = self.model
+            elif self.repo_id:
+                self.model = self.repo_id
            else:
-                values["model"] = DEFAULT_MODEL
-                values["repo_id"] = DEFAULT_MODEL
+                self.model = DEFAULT_MODEL
+                self.repo_id = DEFAULT_MODEL

            client = InferenceClient(
-                model=values["model"],
+                model=self.model,
                token=huggingfacehub_api_token,
            )

            async_client = AsyncInferenceClient(
-                model=values["model"],
+                model=self.model,
                token=huggingfacehub_api_token,
            )

-            if values["task"] not in VALID_TASKS:
+            if self.task not in VALID_TASKS:
                raise ValueError(
-                    f"Got invalid task {values['task']}, "
+                    f"Got invalid task {self.task}, "
                    f"currently only {VALID_TASKS} are supported"
                )
-            values["client"] = client
-            values["async_client"] = async_client
+            self.client = client
+            self.async_client = async_client

        except ImportError:
            raise ImportError(
                "Could not import huggingface_hub python package. "
                "Please install it with `pip install huggingface_hub`."
            )
-        return values
+        return self

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Call out to HuggingFaceHub's embedding endpoint for embedding search docs.
--- a/libs/partners/huggingface/langchain_huggingface/llms/huggingface_endpoint.py
+++ b/libs/partners/huggingface/langchain_huggingface/llms/huggingface_endpoint.py
@@ -9,8 +9,9 @@ from langchain_core.callbacks import (
 )
 from langchain_core.language_models.llms import LLM
 from langchain_core.outputs import GenerationChunk
-from langchain_core.pydantic_v1 import Field, root_validator
-from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names
+from langchain_core.utils import from_env, get_pydantic_field_names
+from pydantic import ConfigDict, Field, model_validator
+from typing_extensions import Self

 logger = logging.getLogger(__name__)

@@ -71,7 +72,9 @@ class HuggingFaceEndpoint(LLM):
    should be pass as env variable in `HF_INFERENCE_ENDPOINT`"""
    repo_id: Optional[str] = None
    """Repo to use. If endpoint_url is not specified then this needs to given"""
-    huggingfacehub_api_token: Optional[str] = None
+    huggingfacehub_api_token: Optional[str] = Field(
+        default_factory=from_env("HUGGINGFACEHUB_API_TOKEN", default=None)
+    )
    max_new_tokens: int = 512
    """Maximum number of generated tokens"""
    top_k: Optional[int] = None
@@ -118,13 +121,13 @@ class HuggingFaceEndpoint(LLM):
    """Task to call the model with.
    Should be a task that returns `generated_text` or `summary_text`."""

-    class Config:
-        """Configuration for this pydantic object."""
+    model_config = ConfigDict(
+        extra="forbid",
+    )

-        extra = "forbid"
-
-    @root_validator(pre=True)
-    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+    @model_validator(mode="before")
+    @classmethod
+    def build_extra(cls, values: Dict[str, Any]) -> Any:
        """Build extra kwargs from additional params that were passed in."""
        all_required_field_names = get_pydantic_field_names(cls)
        extra = values.get("model_kwargs", {})
@@ -182,8 +185,8 @@ class HuggingFaceEndpoint(LLM):
            )
        return values

-    @root_validator(pre=False, skip_on_failure=True)
-    def validate_environment(cls, values: Dict) -> Dict:
+    @model_validator(mode="after")
+    def validate_environment(self) -> Self:
        """Validate that package is installed and that the API token is valid."""
        try:
            from huggingface_hub import login  # type: ignore[import]
@@ -194,12 +197,8 @@ class HuggingFaceEndpoint(LLM):
                "Please install it with `pip install huggingface_hub`."
            )

-        values["huggingfacehub_api_token"] = get_from_dict_or_env(
-            values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN", None
-        )
-
-        huggingfacehub_api_token = get_from_dict_or_env(
-            values, "huggingfacehub_api_token", "HF_TOKEN", None
+        huggingfacehub_api_token = self.huggingfacehub_api_token or os.getenv(
+            "HF_TOKEN"
        )

        if huggingfacehub_api_token is not None:
@@ -213,20 +212,20 @@ class HuggingFaceEndpoint(LLM):

        from huggingface_hub import AsyncInferenceClient, InferenceClient

-        values["client"] = InferenceClient(
-            model=values["model"],
-            timeout=values["timeout"],
+        self.client = InferenceClient(
+            model=self.model,
+            timeout=self.timeout,
            token=huggingfacehub_api_token,
-            **values["server_kwargs"],
+            **self.server_kwargs,
        )
-        values["async_client"] = AsyncInferenceClient(
-            model=values["model"],
-            timeout=values["timeout"],
+        self.async_client = AsyncInferenceClient(
+            model=self.model,
+            timeout=self.timeout,
            token=huggingfacehub_api_token,
-            **values["server_kwargs"],
+            **self.server_kwargs,
        )

-        return values
+        return self

    @property
    def _default_params(self) -> Dict[str, Any]:
--- a/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py
+++ b/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py
@@ -7,6 +7,7 @@ from typing import Any, Iterator, List, Mapping, Optional
 from langchain_core.callbacks import CallbackManagerForLLMRun
 from langchain_core.language_models.llms import BaseLLM
 from langchain_core.outputs import Generation, GenerationChunk, LLMResult
+from pydantic import ConfigDict

 DEFAULT_MODEL_ID = "gpt2"
 DEFAULT_TASK = "text-generation"
@@ -63,10 +64,9 @@ class HuggingFacePipeline(BaseLLM):
    batch_size: int = DEFAULT_BATCH_SIZE
    """Batch size to use when passing multiple documents to generate."""

-    class Config:
-        """Configuration for this pydantic object."""
-
-        extra = "forbid"
+    model_config = ConfigDict(
+        extra="forbid",
+    )

    @classmethod
    def from_model_id(