multiple: pydantic 2 compatibility, v0.3 (#26443)

Signed-off-by: ChengZi <chen.zhang@zilliz.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com>
Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com>
Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: ccurme <chester.curme@gmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Co-authored-by: ZhangShenao <15201440436@163.com>
Co-authored-by: Friso H. Kingma <fhkingma@gmail.com>
Co-authored-by: ChengZi <chen.zhang@zilliz.com>
Co-authored-by: Nuno Campos <nuno@langchain.dev>
Co-authored-by: Morgante Pell <morgantep@google.com>
This commit is contained in:
Erick Friis
2024-09-13 14:38:45 -07:00
committed by GitHub
parent d9813bdbbc
commit c2a3021bb0
1402 changed files with 38318 additions and 30410 deletions

View File

@@ -1,7 +1,7 @@
from typing import Any, Dict, List, Optional # type: ignore[import-not-found]
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
@@ -26,7 +26,7 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
)
"""
client: Any #: :meta private:
client: Any = None #: :meta private:
model_name: str = DEFAULT_MODEL_NAME
"""Model name to use."""
cache_folder: Optional[str] = None
@@ -51,7 +51,6 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
super().__init__(**kwargs)
try:
import sentence_transformers # type: ignore[import]
except ImportError as exc:
raise ImportError(
"Could not import sentence_transformers python package. "
@@ -62,10 +61,10 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
)
class Config:
"""Configuration for this pydantic object."""
extra = "forbid"
model_config = ConfigDict(
extra="forbid",
protected_namespaces=(),
)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a HuggingFace transformer model.

View File

@@ -1,9 +1,11 @@
import json
from typing import Any, Dict, List, Optional
import os
from typing import Any, List, Optional
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain_core.utils import get_from_dict_or_env
from langchain_core.utils import from_env
from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Self
DEFAULT_MODEL = "sentence-transformers/all-mpnet-base-v2"
VALID_TASKS = ("feature-extraction",)
@@ -28,8 +30,8 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
)
"""
client: Any #: :meta private:
async_client: Any #: :meta private:
client: Any = None #: :meta private:
async_client: Any = None #: :meta private:
model: Optional[str] = None
"""Model name to use."""
repo_id: Optional[str] = None
@@ -39,22 +41,20 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
model_kwargs: Optional[dict] = None
"""Keyword arguments to pass to the model."""
huggingfacehub_api_token: Optional[str] = None
huggingfacehub_api_token: Optional[str] = Field(
default_factory=from_env("HUGGINGFACEHUB_API_TOKEN", default=None)
)
class Config:
"""Configuration for this pydantic object."""
model_config = ConfigDict(
extra="forbid",
protected_namespaces=(),
)
extra = "forbid"
@root_validator(pre=False, skip_on_failure=True)
def validate_environment(cls, values: Dict) -> Dict:
@model_validator(mode="after")
def validate_environment(self) -> Self:
"""Validate that api key and python package exists in environment."""
values["huggingfacehub_api_token"] = get_from_dict_or_env(
values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN", None
)
huggingfacehub_api_token = get_from_dict_or_env(
values, "huggingfacehub_api_token", "HF_TOKEN", None
huggingfacehub_api_token = self.huggingfacehub_api_token or os.getenv(
"HF_TOKEN"
)
try:
@@ -63,38 +63,38 @@ class HuggingFaceEndpointEmbeddings(BaseModel, Embeddings):
InferenceClient,
)
if values["model"]:
values["repo_id"] = values["model"]
elif values["repo_id"]:
values["model"] = values["repo_id"]
if self.model:
self.repo_id = self.model
elif self.repo_id:
self.model = self.repo_id
else:
values["model"] = DEFAULT_MODEL
values["repo_id"] = DEFAULT_MODEL
self.model = DEFAULT_MODEL
self.repo_id = DEFAULT_MODEL
client = InferenceClient(
model=values["model"],
model=self.model,
token=huggingfacehub_api_token,
)
async_client = AsyncInferenceClient(
model=values["model"],
model=self.model,
token=huggingfacehub_api_token,
)
if values["task"] not in VALID_TASKS:
if self.task not in VALID_TASKS:
raise ValueError(
f"Got invalid task {values['task']}, "
f"Got invalid task {self.task}, "
f"currently only {VALID_TASKS} are supported"
)
values["client"] = client
values["async_client"] = async_client
self.client = client
self.async_client = async_client
except ImportError:
raise ImportError(
"Could not import huggingface_hub python package. "
"Please install it with `pip install huggingface_hub`."
)
return values
return self
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Call out to HuggingFaceHub's embedding endpoint for embedding search docs.