chore(hf-text-gen): extract default params for reusing (#7929)

This PR extract common code (default generation params) for `HuggingFaceTextGenInference`. Co-authored-by: Junlin Zhou <jlzhou@zjuici.com>
2025-08-11 13:55:03 +00:00 · 2023-07-20 21:49:12 +08:00 · 2023-07-20 21:49:12 +08:00 · 812a1643db
commit 812a1643db
parent 54e02e4392
1 changed files with 23 additions and 35 deletions
--- a/langchain/llms/huggingface_text_gen_inference.py
+++ b/langchain/llms/huggingface_text_gen_inference.py
@ -36,6 +36,8 @@ class HuggingFaceTextGenInference(LLM):
    - _call: Generates text based on a given prompt and stop sequences.
    - _acall: Async generates text based on a given prompt and stop sequences.
    - _llm_type: Returns the type of LLM.
    - _default_params: Returns the default parameters for calling text generation
     inference API.
    """
    """
@ -123,6 +125,21 @@ class HuggingFaceTextGenInference(LLM):
        """Return type of llm."""
        return "huggingface_textgen_inference"
    @property
    def _default_params(self) -> Dict[str, Any]:
        """Get the default parameters for calling text generation inference API."""
        return {
            "max_new_tokens": self.max_new_tokens,
            "top_k": self.top_k,
            "top_p": self.top_p,
            "typical_p": self.typical_p,
            "temperature": self.temperature,
            "repetition_penalty": self.repetition_penalty,
            "truncate": self.truncate,
            "stop_sequences": self.stop_sequences,
            "seed": self.seed,
        }
    def _call(
        self,
        prompt: str,
@ -138,15 +155,8 @@ class HuggingFaceTextGenInference(LLM):
        if not self.stream:
            res = self.client.generate(
                prompt,
                **self._default_params,
                stop_sequences=stop,
                max_new_tokens=self.max_new_tokens,
                top_k=self.top_k,
                top_p=self.top_p,
                typical_p=self.typical_p,
                temperature=self.temperature,
                repetition_penalty=self.repetition_penalty,
                truncate=self.truncate,
                seed=self.seed,
                **kwargs,
            )
            # remove stop sequences from the end of the generated text
@ -163,15 +173,9 @@ class HuggingFaceTextGenInference(LLM):
                    run_manager.on_llm_new_token, verbose=self.verbose
                )
            params = {
                **self._default_params,
                "stop_sequences": stop,
-                "max_new_tokens": self.max_new_tokens,
+                **kwargs,
                "top_k": self.top_k,
                "top_p": self.top_p,
                "typical_p": self.typical_p,
                "temperature": self.temperature,
                "repetition_penalty": self.repetition_penalty,
                "truncate": self.truncate,
                "seed": self.seed,
            }
            text = ""
            for res in self.client.generate_stream(prompt, **params):
@ -204,15 +208,8 @@ class HuggingFaceTextGenInference(LLM):
        if not self.stream:
            res = await self.async_client.generate(
                prompt,
                **self._default_params,
                stop_sequences=stop,
                max_new_tokens=self.max_new_tokens,
                top_k=self.top_k,
                top_p=self.top_p,
                typical_p=self.typical_p,
                temperature=self.temperature,
                repetition_penalty=self.repetition_penalty,
                truncate=self.truncate,
                seed=self.seed,
                **kwargs,
            )
            # remove stop sequences from the end of the generated text
@ -229,17 +226,8 @@ class HuggingFaceTextGenInference(LLM):
                    run_manager.on_llm_new_token, verbose=self.verbose
                )
            params = {
-                **{
+                **self._default_params,
                "stop_sequences": stop,
                    "max_new_tokens": self.max_new_tokens,
                    "top_k": self.top_k,
                    "top_p": self.top_p,
                    "typical_p": self.typical_p,
                    "temperature": self.temperature,
                    "repetition_penalty": self.repetition_penalty,
                    "truncate": self.truncate,
                    "seed": self.seed,
                },
                **kwargs,
            }
            text = ""