partners: Fixed the procedure of initializing pad_token_id (#29500)

- **Description:** Add to check pad_token_id and eos_token_id of model
config. It seems that this is the same bug as the HuggingFace TGI bug.
It's same bug as #29434
- **Issue:** #29431
- **Dependencies:** none
- **Twitter handle:** tell14

Example code is followings:
```python
from langchain_huggingface.llms import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="meta-llama/Llama-3.2-3B-Instruct",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 10},
)

from langchain_core.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | hf

question = "What is electroencephalography?"

print(chain.invoke({"question": question}))
```
This commit is contained in:
Teruaki Ishizaki
2025-02-04 11:40:33 +09:00
committed by GitHub
parent e8b91283ef
commit aeb42dc900
3 changed files with 23 additions and 9 deletions

View File

@@ -202,7 +202,16 @@ class HuggingFacePipeline(BaseLLM):
model = model_cls.from_pretrained(model_id, **_model_kwargs)
if tokenizer.pad_token is None:
tokenizer.pad_token_id = model.config.eos_token_id
if model.config.pad_token_id is not None:
tokenizer.pad_token_id = model.config.pad_token_id
elif model.config.eos_token_id is not None and isinstance(
model.config.eos_token_id, int
):
tokenizer.pad_token_id = model.config.eos_token_id
elif tokenizer.eos_token_id is not None:
tokenizer.pad_token_id = tokenizer.eos_token_id
else:
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
if (
(