mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-17 16:39:52 +00:00
enable the device_map parameter in huggingface pipeline (#12731)
### Enabling `device_map` in HuggingFacePipeline For multi-gpu settings with large models, the [accelerate](https://huggingface.co/docs/accelerate/usage_guides/big_modeling#using--accelerate) library provides the `device_map` parameter to automatically distribute the model across GPUs / disk. The [Transformers pipeline](3520e37e86/src/transformers/pipelines/__init__.py (L543)
) enables users to specify `device` (or) `device_map`, and handles cases (with warnings) when both are specified. However, Langchain's HuggingFacePipeline only supports specifying `device` when calling transformers which limits large models and multi-gpu use-cases. Additionally, the [default value](8bd3ce59cd/libs/langchain/langchain/llms/huggingface_pipeline.py (L72)
) of `device` is initialized to `-1` , which is incompatible with the transformers pipeline when `device_map` is specified. This PR addresses the addition of `device_map` as a parameter , and solves the incompatibility of `device = -1` when `device_map` is also specified. An additional test has been added for this feature. Additionally, some existing tests no longer work since 1. `max_new_tokens` has to be specified under `pipeline_kwargs` and not `model_kwargs` 2. The GPT2 tokenizer raises a `ValueError: Pipeline with tokenizer without pad_token cannot do batching`, since the `tokenizer.pad_token` is `None` ([related issue](https://github.com/huggingface/transformers/issues/19853) on the transformers repo). This PR handles fixing these tests as well. Co-authored-by: Praveen Venkateswaran <praveen.venkateswaran@ibm.com>
This commit is contained in:
parent
3276aa3e17
commit
21eeba075c
@ -70,6 +70,7 @@ class HuggingFacePipeline(BaseLLM):
|
|||||||
model_id: str,
|
model_id: str,
|
||||||
task: str,
|
task: str,
|
||||||
device: Optional[int] = -1,
|
device: Optional[int] = -1,
|
||||||
|
device_map: Optional[str] = None,
|
||||||
model_kwargs: Optional[dict] = None,
|
model_kwargs: Optional[dict] = None,
|
||||||
pipeline_kwargs: Optional[dict] = None,
|
pipeline_kwargs: Optional[dict] = None,
|
||||||
batch_size: int = DEFAULT_BATCH_SIZE,
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
||||||
@ -108,6 +109,9 @@ class HuggingFacePipeline(BaseLLM):
|
|||||||
f"Could not load the {task} model due to missing dependencies."
|
f"Could not load the {task} model due to missing dependencies."
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
|
if tokenizer.pad_token is None:
|
||||||
|
tokenizer.pad_token_id = model.config.eos_token_id
|
||||||
|
|
||||||
if (
|
if (
|
||||||
getattr(model, "is_loaded_in_4bit", False)
|
getattr(model, "is_loaded_in_4bit", False)
|
||||||
or getattr(model, "is_loaded_in_8bit", False)
|
or getattr(model, "is_loaded_in_8bit", False)
|
||||||
@ -129,7 +133,9 @@ class HuggingFacePipeline(BaseLLM):
|
|||||||
f"Got device=={device}, "
|
f"Got device=={device}, "
|
||||||
f"device is required to be within [-1, {cuda_device_count})"
|
f"device is required to be within [-1, {cuda_device_count})"
|
||||||
)
|
)
|
||||||
if device < 0 and cuda_device_count > 0:
|
if device_map is not None and device < 0:
|
||||||
|
device = None
|
||||||
|
if device is not None and device < 0 and cuda_device_count > 0:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Device has %d GPUs available. "
|
"Device has %d GPUs available. "
|
||||||
"Provide device={deviceId} to `from_model_id` to use available"
|
"Provide device={deviceId} to `from_model_id` to use available"
|
||||||
@ -147,6 +153,7 @@ class HuggingFacePipeline(BaseLLM):
|
|||||||
model=model,
|
model=model,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
device=device,
|
device=device,
|
||||||
|
device_map=device_map,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
model_kwargs=_model_kwargs,
|
model_kwargs=_model_kwargs,
|
||||||
**_pipeline_kwargs,
|
**_pipeline_kwargs,
|
||||||
|
16
libs/langchain/tests/integration_tests/llms/test_huggingface_pipeline.py
Normal file → Executable file
16
libs/langchain/tests/integration_tests/llms/test_huggingface_pipeline.py
Normal file → Executable file
@ -10,7 +10,7 @@ from tests.integration_tests.llms.utils import assert_llm_equality
|
|||||||
def test_huggingface_pipeline_text_generation() -> None:
|
def test_huggingface_pipeline_text_generation() -> None:
|
||||||
"""Test valid call to HuggingFace text generation model."""
|
"""Test valid call to HuggingFace text generation model."""
|
||||||
llm = HuggingFacePipeline.from_model_id(
|
llm = HuggingFacePipeline.from_model_id(
|
||||||
model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10}
|
model_id="gpt2", task="text-generation", pipeline_kwargs={"max_new_tokens": 10}
|
||||||
)
|
)
|
||||||
output = llm("Say foo:")
|
output = llm("Say foo:")
|
||||||
assert isinstance(output, str)
|
assert isinstance(output, str)
|
||||||
@ -25,6 +25,18 @@ def test_huggingface_pipeline_text2text_generation() -> None:
|
|||||||
assert isinstance(output, str)
|
assert isinstance(output, str)
|
||||||
|
|
||||||
|
|
||||||
|
def test_huggingface_pipeline_device_map() -> None:
|
||||||
|
"""Test pipelines specifying the device map parameter."""
|
||||||
|
llm = HuggingFacePipeline.from_model_id(
|
||||||
|
model_id="gpt2",
|
||||||
|
task="text-generation",
|
||||||
|
device_map="auto",
|
||||||
|
pipeline_kwargs={"max_new_tokens": 10},
|
||||||
|
)
|
||||||
|
output = llm("Say foo:")
|
||||||
|
assert isinstance(output, str)
|
||||||
|
|
||||||
|
|
||||||
def text_huggingface_pipeline_summarization() -> None:
|
def text_huggingface_pipeline_summarization() -> None:
|
||||||
"""Test valid call to HuggingFace summarization model."""
|
"""Test valid call to HuggingFace summarization model."""
|
||||||
llm = HuggingFacePipeline.from_model_id(
|
llm = HuggingFacePipeline.from_model_id(
|
||||||
@ -37,7 +49,7 @@ def text_huggingface_pipeline_summarization() -> None:
|
|||||||
def test_saving_loading_llm(tmp_path: Path) -> None:
|
def test_saving_loading_llm(tmp_path: Path) -> None:
|
||||||
"""Test saving/loading an HuggingFaceHub LLM."""
|
"""Test saving/loading an HuggingFaceHub LLM."""
|
||||||
llm = HuggingFacePipeline.from_model_id(
|
llm = HuggingFacePipeline.from_model_id(
|
||||||
model_id="gpt2", task="text-generation", model_kwargs={"max_new_tokens": 10}
|
model_id="gpt2", task="text-generation", pipeline_kwargs={"max_new_tokens": 10}
|
||||||
)
|
)
|
||||||
llm.save(file_path=tmp_path / "hf.yaml")
|
llm.save(file_path=tmp_path / "hf.yaml")
|
||||||
loaded_llm = load_llm(tmp_path / "hf.yaml")
|
loaded_llm = load_llm(tmp_path / "hf.yaml")
|
||||||
|
Loading…
Reference in New Issue
Block a user