community: Fixed the procedure of initializing pad_token_id (#29434)

- **Description:** Add to check pad_token_id and eos_token_id of model config. It seems that this is the same bug as the HuggingFace TGI bug. In addition, the source code of libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py also requires similar changes. - **Issue:** #29431 - **Dependencies:** none - **Twitter handle:** tell14
2025-09-01 19:12:42 +00:00 · 2025-01-28 04:54:54 +09:00
parent dbb6b7b103
commit 3fce78994e
1 changed files with 10 additions and 1 deletions
--- a/libs/community/langchain_community/llms/huggingface_pipeline.py
+++ b/libs/community/langchain_community/llms/huggingface_pipeline.py
@@ -169,7 +169,16 @@ class HuggingFacePipeline(BaseLLM):
            ) from e

        if tokenizer.pad_token is None:
-            tokenizer.pad_token_id = model.config.eos_token_id
+            if model.config.pad_token_id is not None:
+                tokenizer.pad_token_id = model.config.pad_token_id
+            elif model.config.eos_token_id is not None and isinstance(
+                model.config.eos_token_id, int
+            ):
+                tokenizer.pad_token_id = model.config.eos_token_id
+            elif tokenizer.eos_token_id is not None:
+                tokenizer.pad_token_id = tokenizer.eos_token_id
+            else:
+                tokenizer.add_special_tokens({"pad_token": "[PAD]"})

        if (
            (