From 6208773c77060c6604c267ad51dcc34000984fa9 Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 10 Sep 2024 09:28:41 -0400 Subject: [PATCH] community[patch]: set protected namespaces on embeddings (#26156) Also fix serdes test for langchain-google-genai. --- .../langchain_community/embeddings/baichuan.py | 4 +--- .../langchain_community/embeddings/bedrock.py | 4 +--- .../langchain_community/embeddings/clarifai.py | 4 +--- .../embeddings/cloudflare_workersai.py | 4 +--- .../langchain_community/embeddings/deepinfra.py | 4 +--- .../langchain_community/embeddings/fastembed.py | 4 +--- .../langchain_community/embeddings/huggingface.py | 12 +++--------- .../embeddings/huggingface_hub.py | 4 +--- .../langchain_community/embeddings/ipex_llm.py | 4 +--- .../langchain_community/embeddings/localai.py | 4 +--- .../langchain_community/embeddings/modelscope_hub.py | 4 +--- .../embeddings/oci_generative_ai.py | 4 +--- .../langchain_community/embeddings/ollama.py | 4 +--- .../langchain_community/embeddings/openai.py | 3 +-- .../langchain_community/embeddings/openvino.py | 4 +--- .../langchain_community/embeddings/ovhcloud.py | 4 +--- .../embeddings/sagemaker_endpoint.py | 3 +-- .../embeddings/spacy_embeddings.py | 4 +--- .../langchain_community/embeddings/yandex.py | 4 +--- .../tests/unit_tests/load/test_serializable.py | 5 +++-- 20 files changed, 24 insertions(+), 63 deletions(-) diff --git a/libs/community/langchain_community/embeddings/baichuan.py b/libs/community/langchain_community/embeddings/baichuan.py index 68fc6d8d954..082ff369ed7 100644 --- a/libs/community/langchain_community/embeddings/baichuan.py +++ b/libs/community/langchain_community/embeddings/baichuan.py @@ -70,9 +70,7 @@ class BaichuanTextEmbeddings(BaseModel, Embeddings): chunk_size: int = 16 """Chunk size when multiple texts are input""" - model_config = ConfigDict( - populate_by_name=True, - ) + model_config = ConfigDict(populate_by_name=True, protected_namespaces=()) @model_validator(mode="after") def validate_environment(self) -> Self: diff --git a/libs/community/langchain_community/embeddings/bedrock.py b/libs/community/langchain_community/embeddings/bedrock.py index 671073e743f..7287e1acee9 100644 --- a/libs/community/langchain_community/embeddings/bedrock.py +++ b/libs/community/langchain_community/embeddings/bedrock.py @@ -75,9 +75,7 @@ class BedrockEmbeddings(BaseModel, Embeddings): normalize: bool = False """Whether the embeddings should be normalized to unit vectors""" - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) @model_validator(mode="after") def validate_environment(self) -> Self: diff --git a/libs/community/langchain_community/embeddings/clarifai.py b/libs/community/langchain_community/embeddings/clarifai.py index 4c35fcf75b8..e460020bef1 100644 --- a/libs/community/langchain_community/embeddings/clarifai.py +++ b/libs/community/langchain_community/embeddings/clarifai.py @@ -43,9 +43,7 @@ class ClarifaiEmbeddings(BaseModel, Embeddings): model: Any = Field(default=None, exclude=True) #: :meta private: api_base: str = "https://api.clarifai.com" - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) @model_validator(mode="before") @classmethod diff --git a/libs/community/langchain_community/embeddings/cloudflare_workersai.py b/libs/community/langchain_community/embeddings/cloudflare_workersai.py index 45fadb39c68..87d15c60992 100644 --- a/libs/community/langchain_community/embeddings/cloudflare_workersai.py +++ b/libs/community/langchain_community/embeddings/cloudflare_workersai.py @@ -43,9 +43,7 @@ class CloudflareWorkersAIEmbeddings(BaseModel, Embeddings): self.headers = {"Authorization": f"Bearer {self.api_token}"} - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using Cloudflare Workers AI. diff --git a/libs/community/langchain_community/embeddings/deepinfra.py b/libs/community/langchain_community/embeddings/deepinfra.py index 05f3ed982b4..d0d2c476011 100644 --- a/libs/community/langchain_community/embeddings/deepinfra.py +++ b/libs/community/langchain_community/embeddings/deepinfra.py @@ -54,9 +54,7 @@ class DeepInfraEmbeddings(BaseModel, Embeddings): batch_size: int = MAX_BATCH_SIZE """Batch size for embedding requests.""" - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) @pre_init def validate_environment(cls, values: Dict) -> Dict: diff --git a/libs/community/langchain_community/embeddings/fastembed.py b/libs/community/langchain_community/embeddings/fastembed.py index fe654aa4459..75dcbda805f 100644 --- a/libs/community/langchain_community/embeddings/fastembed.py +++ b/libs/community/langchain_community/embeddings/fastembed.py @@ -67,9 +67,7 @@ class FastEmbedEmbeddings(BaseModel, Embeddings): _model: Any # : :meta private: - model_config = ConfigDict( - extra="allow", - ) + model_config = ConfigDict(extra="allow", protected_namespaces=()) @pre_init def validate_environment(cls, values: Dict) -> Dict: diff --git a/libs/community/langchain_community/embeddings/huggingface.py b/libs/community/langchain_community/embeddings/huggingface.py index f4c2f38f8fd..e6eeadca580 100644 --- a/libs/community/langchain_community/embeddings/huggingface.py +++ b/libs/community/langchain_community/embeddings/huggingface.py @@ -93,9 +93,7 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings): self.model_name, cache_folder=self.cache_folder, **self.model_kwargs ) - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a HuggingFace transformer model. @@ -209,9 +207,7 @@ class HuggingFaceInstructEmbeddings(BaseModel, Embeddings): ) self.show_progress = self.encode_kwargs.pop("show_progress_bar") - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a HuggingFace instruct model. @@ -350,9 +346,7 @@ class HuggingFaceBgeEmbeddings(BaseModel, Embeddings): ) self.show_progress = self.encode_kwargs.pop("show_progress_bar") - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a HuggingFace transformer model. diff --git a/libs/community/langchain_community/embeddings/huggingface_hub.py b/libs/community/langchain_community/embeddings/huggingface_hub.py index cea56ad11c7..33711181a19 100644 --- a/libs/community/langchain_community/embeddings/huggingface_hub.py +++ b/libs/community/langchain_community/embeddings/huggingface_hub.py @@ -48,9 +48,7 @@ class HuggingFaceHubEmbeddings(BaseModel, Embeddings): huggingfacehub_api_token: Optional[str] = None - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) @model_validator(mode="before") @classmethod diff --git a/libs/community/langchain_community/embeddings/ipex_llm.py b/libs/community/langchain_community/embeddings/ipex_llm.py index be18e2f3461..1f53cd7fd7b 100644 --- a/libs/community/langchain_community/embeddings/ipex_llm.py +++ b/libs/community/langchain_community/embeddings/ipex_llm.py @@ -106,9 +106,7 @@ class IpexLLMBgeEmbeddings(BaseModel, Embeddings): if "-zh" in self.model_name: self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a HuggingFace transformer model. diff --git a/libs/community/langchain_community/embeddings/localai.py b/libs/community/langchain_community/embeddings/localai.py index b59433ad2e2..f4ca9fa9723 100644 --- a/libs/community/langchain_community/embeddings/localai.py +++ b/libs/community/langchain_community/embeddings/localai.py @@ -166,9 +166,7 @@ class LocalAIEmbeddings(BaseModel, Embeddings): model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) @model_validator(mode="before") @classmethod diff --git a/libs/community/langchain_community/embeddings/modelscope_hub.py b/libs/community/langchain_community/embeddings/modelscope_hub.py index 878f05a4fe2..a0c78633916 100644 --- a/libs/community/langchain_community/embeddings/modelscope_hub.py +++ b/libs/community/langchain_community/embeddings/modelscope_hub.py @@ -39,9 +39,7 @@ class ModelScopeEmbeddings(BaseModel, Embeddings): model_revision=self.model_revision, ) - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a modelscope embedding model. diff --git a/libs/community/langchain_community/embeddings/oci_generative_ai.py b/libs/community/langchain_community/embeddings/oci_generative_ai.py index 72c4059d2be..10bef6d441d 100644 --- a/libs/community/langchain_community/embeddings/oci_generative_ai.py +++ b/libs/community/langchain_community/embeddings/oci_generative_ai.py @@ -85,9 +85,7 @@ class OCIGenAIEmbeddings(BaseModel, Embeddings): """Batch size of OCI GenAI embedding requests. OCI GenAI may handle up to 96 texts per request""" - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) @pre_init def validate_environment(cls, values: Dict) -> Dict: # pylint: disable=no-self-argument diff --git a/libs/community/langchain_community/embeddings/ollama.py b/libs/community/langchain_community/embeddings/ollama.py index 13be9a3cd84..b55cff9b6fa 100644 --- a/libs/community/langchain_community/embeddings/ollama.py +++ b/libs/community/langchain_community/embeddings/ollama.py @@ -141,9 +141,7 @@ class OllamaEmbeddings(BaseModel, Embeddings): """Get the identifying parameters.""" return {**{"model": self.model}, **self._default_params} - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def _process_emb_response(self, input: str) -> List[float]: """Process a response from the API. diff --git a/libs/community/langchain_community/embeddings/openai.py b/libs/community/langchain_community/embeddings/openai.py index a7abde76ea3..770bb765e81 100644 --- a/libs/community/langchain_community/embeddings/openai.py +++ b/libs/community/langchain_community/embeddings/openai.py @@ -255,8 +255,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): """Optional httpx.Client.""" model_config = ConfigDict( - populate_by_name=True, - extra="forbid", + populate_by_name=True, extra="forbid", protected_namespaces=() ) @model_validator(mode="before") diff --git a/libs/community/langchain_community/embeddings/openvino.py b/libs/community/langchain_community/embeddings/openvino.py index 8a83cfd1679..d58bf92d255 100644 --- a/libs/community/langchain_community/embeddings/openvino.py +++ b/libs/community/langchain_community/embeddings/openvino.py @@ -254,9 +254,7 @@ class OpenVINOEmbeddings(BaseModel, Embeddings): return all_embeddings - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a HuggingFace transformer model. diff --git a/libs/community/langchain_community/embeddings/ovhcloud.py b/libs/community/langchain_community/embeddings/ovhcloud.py index 51b0dd2f34a..5786a761f49 100644 --- a/libs/community/langchain_community/embeddings/ovhcloud.py +++ b/libs/community/langchain_community/embeddings/ovhcloud.py @@ -23,9 +23,7 @@ class OVHCloudEmbeddings(BaseModel, Embeddings): """ OVHcloud AI Endpoints region""" region: str = "kepler" - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) def __init__(self, **kwargs: Any): super().__init__(**kwargs) diff --git a/libs/community/langchain_community/embeddings/sagemaker_endpoint.py b/libs/community/langchain_community/embeddings/sagemaker_endpoint.py index d5fa48f3b03..d69cbd92ea8 100644 --- a/libs/community/langchain_community/embeddings/sagemaker_endpoint.py +++ b/libs/community/langchain_community/embeddings/sagemaker_endpoint.py @@ -111,8 +111,7 @@ class SagemakerEndpointEmbeddings(BaseModel, Embeddings): """ model_config = ConfigDict( - arbitrary_types_allowed=True, - extra="forbid", + arbitrary_types_allowed=True, extra="forbid", protected_namespaces=() ) @pre_init diff --git a/libs/community/langchain_community/embeddings/spacy_embeddings.py b/libs/community/langchain_community/embeddings/spacy_embeddings.py index 63c71e07c3f..0413741c4df 100644 --- a/libs/community/langchain_community/embeddings/spacy_embeddings.py +++ b/libs/community/langchain_community/embeddings/spacy_embeddings.py @@ -22,9 +22,7 @@ class SpacyEmbeddings(BaseModel, Embeddings): model_name: str = "en_core_web_sm" nlp: Optional[Any] = None - model_config = ConfigDict( - extra="forbid", - ) + model_config = ConfigDict(extra="forbid", protected_namespaces=()) @model_validator(mode="before") @classmethod diff --git a/libs/community/langchain_community/embeddings/yandex.py b/libs/community/langchain_community/embeddings/yandex.py index fdc6dc6873e..59637bf1001 100644 --- a/libs/community/langchain_community/embeddings/yandex.py +++ b/libs/community/langchain_community/embeddings/yandex.py @@ -71,9 +71,7 @@ class YandexGPTEmbeddings(BaseModel, Embeddings): If you provide personal data, confidential information, disable logging.""" grpc_metadata: Sequence - model_config = ConfigDict( - populate_by_name=True, - ) + model_config = ConfigDict(populate_by_name=True, protected_namespaces=()) @pre_init def validate_environment(cls, values: Dict) -> Dict: diff --git a/libs/community/tests/unit_tests/load/test_serializable.py b/libs/community/tests/unit_tests/load/test_serializable.py index 35ea4b169d7..819d47d9a73 100644 --- a/libs/community/tests/unit_tests/load/test_serializable.py +++ b/libs/community/tests/unit_tests/load/test_serializable.py @@ -112,8 +112,9 @@ def test_serializable_mapping() -> None: "chat_models", "ChatGroq", ), - # TODO(0.3): For now we're skipping this test. Need to fix - # so that it only runs when langchain-aws is installed. + # TODO(0.3): For now we're skipping the below two tests. Need to fix + # so that it only runs when langchain-aws, langchain-google-genai + # are installed. ("langchain", "chat_models", "bedrock", "ChatBedrock"): ( "langchain_aws", "chat_models",