huggingface: fix HuggingFaceEndpoint._astream() got multiple values for argument 'stop' (#31385)

This commit is contained in:
m27315 2025-07-06 10:18:53 -05:00 committed by GitHub
parent e934788ca2
commit 013ce2c47f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -78,13 +78,13 @@ class HuggingFaceEndpoint(LLM):
""" # noqa: E501 """ # noqa: E501
endpoint_url: Optional[str] = None endpoint_url: Optional[str] = None
"""Endpoint URL to use. If repo_id is not specified then this needs to given or """Endpoint URL to use. If repo_id is not specified then this needs to given or
should be pass as env variable in `HF_INFERENCE_ENDPOINT`""" should be pass as env variable in `HF_INFERENCE_ENDPOINT`"""
repo_id: Optional[str] = None repo_id: Optional[str] = None
"""Repo to use. If endpoint_url is not specified then this needs to given""" """Repo to use. If endpoint_url is not specified then this needs to given"""
provider: Optional[str] = None provider: Optional[str] = None
"""Name of the provider to use for inference with the model specified in `repo_id`. """Name of the provider to use for inference with the model specified in `repo_id`.
e.g. "cerebras". if not specified, Defaults to "auto" i.e. the first of the e.g. "cerebras". if not specified, Defaults to "auto" i.e. the first of the
providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers. providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
available providers can be found in the [huggingface_hub documentation](https://huggingface.co/docs/huggingface_hub/guides/inference#supported-providers-and-tasks).""" available providers can be found in the [huggingface_hub documentation](https://huggingface.co/docs/huggingface_hub/guides/inference#supported-providers-and-tasks)."""
huggingfacehub_api_token: Optional[str] = Field( huggingfacehub_api_token: Optional[str] = Field(
@ -305,7 +305,9 @@ class HuggingFaceEndpoint(LLM):
invocation_params = self._invocation_params(stop, **kwargs) invocation_params = self._invocation_params(stop, **kwargs)
if self.streaming: if self.streaming:
completion = "" completion = ""
for chunk in self._stream(prompt, stop, run_manager, **invocation_params): for chunk in self._stream(
prompt, run_manager=run_manager, **invocation_params
):
completion += chunk.text completion += chunk.text
return completion return completion
else: else:
@ -333,7 +335,7 @@ class HuggingFaceEndpoint(LLM):
if self.streaming: if self.streaming:
completion = "" completion = ""
async for chunk in self._astream( async for chunk in self._astream(
prompt, stop, run_manager, **invocation_params prompt, run_manager=run_manager, **invocation_params
): ):
completion += chunk.text completion += chunk.text
return completion return completion