mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-28 06:48:50 +00:00
huggingface[major]: upgrade pydantic (#26048)
This commit is contained in:
@@ -29,10 +29,11 @@ from langchain_core.messages import (
|
||||
ToolMessage,
|
||||
)
|
||||
from langchain_core.outputs import ChatGeneration, ChatResult, LLMResult
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.runnables import Runnable
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
from pydantic import model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint
|
||||
from langchain_huggingface.llms.huggingface_pipeline import HuggingFacePipeline
|
||||
@@ -265,7 +266,7 @@ class ChatHuggingFace(BaseChatModel):
|
||||
Tool calling:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class GetWeather(BaseModel):
|
||||
'''Get the current weather in a given location'''
|
||||
@@ -325,20 +326,20 @@ class ChatHuggingFace(BaseChatModel):
|
||||
else self.tokenizer
|
||||
)
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_llm(cls, values: dict) -> dict:
|
||||
@model_validator(mode="after")
|
||||
def validate_llm(self) -> Self:
|
||||
if (
|
||||
not _is_huggingface_hub(values["llm"])
|
||||
and not _is_huggingface_textgen_inference(values["llm"])
|
||||
and not _is_huggingface_endpoint(values["llm"])
|
||||
and not _is_huggingface_pipeline(values["llm"])
|
||||
not _is_huggingface_hub(self.llm)
|
||||
and not _is_huggingface_textgen_inference(self.llm)
|
||||
and not _is_huggingface_endpoint(self.llm)
|
||||
and not _is_huggingface_pipeline(self.llm)
|
||||
):
|
||||
raise TypeError(
|
||||
"Expected llm to be one of HuggingFaceTextGenInference, "
|
||||
"HuggingFaceEndpoint, HuggingFaceHub, HuggingFacePipeline "
|
||||
f"received {type(values['llm'])}"
|
||||
f"received {type(self.llm)}"
|
||||
)
|
||||
return values
|
||||
return self
|
||||
|
||||
def _create_chat_result(self, response: TGI_RESPONSE) -> ChatResult:
|
||||
generations = []
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Dict, List, Optional # type: ignore[import-not-found]
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
|
||||
|
||||
@@ -62,10 +62,10 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
|
||||
self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
|
||||
)
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
protected_namespaces=(),
|
||||
)
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Compute doc embeddings using a HuggingFace transformer model.
|
||||
|
@@ -1,9 +1,11 @@
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional
|
||||
import os
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, root_validator
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_core.utils import from_env
|
||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
DEFAULT_MODEL = "sentence-transformers/all-mpnet-base-v2"
|
||||
VALID_TASKS = ("feature-extraction",)
|
||||
@@ -39,22 +41,20 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
|
||||
model_kwargs: Optional[dict] = None
|
||||
"""Keyword arguments to pass to the model."""
|
||||
|
||||
huggingfacehub_api_token: Optional[str] = None
|
||||
huggingfacehub_api_token: Optional[str] = Field(
|
||||
default_factory=from_env("HUGGINGFACEHUB_API_TOKEN", default=None)
|
||||
)
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
protected_namespaces=(),
|
||||
)
|
||||
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
@model_validator(mode="after")
|
||||
def validate_environment(self) -> Self:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["huggingfacehub_api_token"] = get_from_dict_or_env(
|
||||
values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN", None
|
||||
)
|
||||
|
||||
huggingfacehub_api_token = get_from_dict_or_env(
|
||||
values, "huggingfacehub_api_token", "HF_TOKEN", None
|
||||
huggingfacehub_api_token = self.huggingfacehub_api_token or os.getenv(
|
||||
"HF_TOKEN"
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -63,38 +63,38 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
|
||||
InferenceClient,
|
||||
)
|
||||
|
||||
if values["model"]:
|
||||
values["repo_id"] = values["model"]
|
||||
elif values["repo_id"]:
|
||||
values["model"] = values["repo_id"]
|
||||
if self.model:
|
||||
self.repo_id = self.model
|
||||
elif self.repo_id:
|
||||
self.model = self.repo_id
|
||||
else:
|
||||
values["model"] = DEFAULT_MODEL
|
||||
values["repo_id"] = DEFAULT_MODEL
|
||||
self.model = DEFAULT_MODEL
|
||||
self.repo_id = DEFAULT_MODEL
|
||||
|
||||
client = InferenceClient(
|
||||
model=values["model"],
|
||||
model=self.model,
|
||||
token=huggingfacehub_api_token,
|
||||
)
|
||||
|
||||
async_client = AsyncInferenceClient(
|
||||
model=values["model"],
|
||||
model=self.model,
|
||||
token=huggingfacehub_api_token,
|
||||
)
|
||||
|
||||
if values["task"] not in VALID_TASKS:
|
||||
if self.task not in VALID_TASKS:
|
||||
raise ValueError(
|
||||
f"Got invalid task {values['task']}, "
|
||||
f"Got invalid task {self.task}, "
|
||||
f"currently only {VALID_TASKS} are supported"
|
||||
)
|
||||
values["client"] = client
|
||||
values["async_client"] = async_client
|
||||
self.client = client
|
||||
self.async_client = async_client
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import huggingface_hub python package. "
|
||||
"Please install it with `pip install huggingface_hub`."
|
||||
)
|
||||
return values
|
||||
return self
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Call out to HuggingFaceHub's embedding endpoint for embedding search docs.
|
||||
|
@@ -9,8 +9,9 @@ from langchain_core.callbacks import (
|
||||
)
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.outputs import GenerationChunk
|
||||
from langchain_core.pydantic_v1 import Field, root_validator
|
||||
from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names
|
||||
from langchain_core.utils import from_env, get_pydantic_field_names
|
||||
from pydantic import ConfigDict, Field, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -71,7 +72,9 @@ class HuggingFaceEndpoint(LLM):
|
||||
should be pass as env variable in `HF_INFERENCE_ENDPOINT`"""
|
||||
repo_id: Optional[str] = None
|
||||
"""Repo to use. If endpoint_url is not specified then this needs to given"""
|
||||
huggingfacehub_api_token: Optional[str] = None
|
||||
huggingfacehub_api_token: Optional[str] = Field(
|
||||
default_factory=from_env("HUGGINGFACEHUB_API_TOKEN", default=None)
|
||||
)
|
||||
max_new_tokens: int = 512
|
||||
"""Maximum number of generated tokens"""
|
||||
top_k: Optional[int] = None
|
||||
@@ -118,13 +121,13 @@ class HuggingFaceEndpoint(LLM):
|
||||
"""Task to call the model with.
|
||||
Should be a task that returns `generated_text` or `summary_text`."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
)
|
||||
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(pre=True)
|
||||
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def build_extra(cls, values: Dict[str, Any]) -> Any:
|
||||
"""Build extra kwargs from additional params that were passed in."""
|
||||
all_required_field_names = get_pydantic_field_names(cls)
|
||||
extra = values.get("model_kwargs", {})
|
||||
@@ -182,8 +185,8 @@ class HuggingFaceEndpoint(LLM):
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
@model_validator(mode="after")
|
||||
def validate_environment(self) -> Self:
|
||||
"""Validate that package is installed and that the API token is valid."""
|
||||
try:
|
||||
from huggingface_hub import login # type: ignore[import]
|
||||
@@ -194,12 +197,8 @@ class HuggingFaceEndpoint(LLM):
|
||||
"Please install it with `pip install huggingface_hub`."
|
||||
)
|
||||
|
||||
values["huggingfacehub_api_token"] = get_from_dict_or_env(
|
||||
values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN", None
|
||||
)
|
||||
|
||||
huggingfacehub_api_token = get_from_dict_or_env(
|
||||
values, "huggingfacehub_api_token", "HF_TOKEN", None
|
||||
huggingfacehub_api_token = self.huggingfacehub_api_token or os.getenv(
|
||||
"HF_TOKEN"
|
||||
)
|
||||
|
||||
if huggingfacehub_api_token is not None:
|
||||
@@ -213,20 +212,20 @@ class HuggingFaceEndpoint(LLM):
|
||||
|
||||
from huggingface_hub import AsyncInferenceClient, InferenceClient
|
||||
|
||||
values["client"] = InferenceClient(
|
||||
model=values["model"],
|
||||
timeout=values["timeout"],
|
||||
self.client = InferenceClient(
|
||||
model=self.model,
|
||||
timeout=self.timeout,
|
||||
token=huggingfacehub_api_token,
|
||||
**values["server_kwargs"],
|
||||
**self.server_kwargs,
|
||||
)
|
||||
values["async_client"] = AsyncInferenceClient(
|
||||
model=values["model"],
|
||||
timeout=values["timeout"],
|
||||
self.async_client = AsyncInferenceClient(
|
||||
model=self.model,
|
||||
timeout=self.timeout,
|
||||
token=huggingfacehub_api_token,
|
||||
**values["server_kwargs"],
|
||||
**self.server_kwargs,
|
||||
)
|
||||
|
||||
return values
|
||||
return self
|
||||
|
||||
@property
|
||||
def _default_params(self) -> Dict[str, Any]:
|
||||
|
@@ -7,6 +7,7 @@ from typing import Any, Iterator, List, Mapping, Optional
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import BaseLLM
|
||||
from langchain_core.outputs import Generation, GenerationChunk, LLMResult
|
||||
from pydantic import ConfigDict
|
||||
|
||||
DEFAULT_MODEL_ID = "gpt2"
|
||||
DEFAULT_TASK = "text-generation"
|
||||
@@ -63,10 +64,9 @@ class HuggingFacePipeline(BaseLLM):
|
||||
batch_size: int = DEFAULT_BATCH_SIZE
|
||||
"""Batch size to use when passing multiple documents to generate."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = "forbid"
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_model_id(
|
||||
|
Reference in New Issue
Block a user