partners: (langchain-huggingface) Chat Models - Integrate Hugging Face Inference Providers and remove deprecated code (#30733)

Hi there, I'm Célina from 🤗, This PR introduces support for Hugging Face's serverless Inference Providers (documentation [here](https://huggingface.co/docs/inference-providers/index)), allowing users to specify different providers for chat completion and text generation tasks. This PR also removes the usage of `InferenceClient.post()` method in `HuggingFaceEndpoint`, in favor of the task-specific `text_generation` method. `InferenceClient.post()` is deprecated and will be removed in `huggingface_hub v0.31.0`. --- ## Changes made - bumped the minimum required version of the `huggingface-hub` package to ensure compatibility with the latest API usage. - added a `provider` field to `HuggingFaceEndpoint`, enabling users to select the inference provider (e.g., 'cerebras', 'together', 'fireworks-ai'). Defaults to `hf-inference` (HF Inference API). - replaced the deprecated `InferenceClient.post()` call in `HuggingFaceEndpoint` with the task-specific `text_generation` method for future-proofing, `post()` will be removed in huggingface-hub v0.31.0. - updated the `ChatHuggingFace` component: - added async and streaming support. - added support for tool calling. - exposed underlying chat completion parameters for more granular control. - Added integration tests for `ChatHuggingFace` and updated the corresponding unit tests. ✅ All changes are backward compatible. --------- Co-authored-by: ccurme <chester.curme@gmail.com>
2025-09-10 23:41:28 +00:00 · 2025-04-29 15:53:14 +02:00
parent 3072e4610a
commit 868f07f8f4
8 changed files with 699 additions and 504 deletions
--- a/libs/partners/huggingface/tests/integration_tests/test_standard.py
+++ b/libs/partners/huggingface/tests/integration_tests/test_standard.py
@@ -15,70 +15,39 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):

    @property
    def chat_model_params(self) -> dict:
-        return {}
+        llm = HuggingFaceEndpoint(  # type: ignore[call-arg]
+            repo_id="Qwen/Qwen2.5-72B-Instruct",
+            task="conversational",
+            provider="fireworks-ai",
+            temperature=0,
+        )
+        return {"llm": llm}

    @pytest.fixture
    def model(self) -> BaseChatModel:
-        llm = HuggingFaceEndpoint(  # type: ignore[call-arg]
-            repo_id="HuggingFaceH4/zephyr-7b-beta",
-            task="text-generation",
-            max_new_tokens=512,
-            do_sample=False,
-            repetition_penalty=1.03,
-        )
-        return self.chat_model_class(llm=llm)  # type: ignore[call-arg]
+        return self.chat_model_class(**self.chat_model_params)  # type: ignore[call-arg]

-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_stream(self, model: BaseChatModel) -> None:
-        super().test_stream(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    async def test_astream(self, model: BaseChatModel) -> None:
-        await super().test_astream(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_usage_metadata(self, model: BaseChatModel) -> None:
-        super().test_usage_metadata(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
-        super().test_usage_metadata_streaming(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_stop_sequence(self, model: BaseChatModel) -> None:
-        super().test_stop_sequence(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_tool_calling(self, model: BaseChatModel) -> None:
-        super().test_tool_calling(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    async def test_tool_calling_async(self, model: BaseChatModel) -> None:
-        await super().test_tool_calling_async(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
-        super().test_tool_calling_with_no_arguments(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
-        super().test_bind_runnables_as_tools(model)
-
-    @pytest.mark.xfail(reason=("Not implemented"))
+    @pytest.mark.xfail(
+        reason=("Overrding, testing only typed dict and json schema structured output")
+    )
+    @pytest.mark.parametrize("schema_type", ["typeddict", "json_schema"])
    def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None:
        super().test_structured_output(model, schema_type)

-    @pytest.mark.xfail(reason=("Not implemented"))
+    @pytest.mark.xfail(
+        reason=("Overrding, testing only typed dict and json schema structured output")
+    )
+    @pytest.mark.parametrize("schema_type", ["typeddict", "json_schema"])
    async def test_structured_output_async(
        self, model: BaseChatModel, schema_type: str
    ) -> None:  # type: ignore[override]
        super().test_structured_output(model, schema_type)

-    @pytest.mark.xfail(reason=("Not implemented"))
+    @pytest.mark.xfail(reason=("Pydantic structured output is not supported"))
    def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
        super().test_structured_output_pydantic_2_v1(model)

-    @pytest.mark.xfail(reason=("Not implemented"))
+    @pytest.mark.xfail(reason=("Pydantic structured output is not supported"))
    def test_structured_output_optional_param(self, model: BaseChatModel) -> None:
        super().test_structured_output_optional_param(model)

@@ -95,3 +64,7 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):
        self, model: BaseChatModel, my_adder_tool: BaseTool
    ) -> None:
        super().test_structured_few_shot_examples(model, my_adder_tool=my_adder_tool)
+
+    @property
+    def has_tool_choice(self) -> bool:
+        return False