community: add Intel GPU support to ipex-llm llm integration (#22458)

**Description:** [IPEX-LLM](https://github.com/intel-analytics/ipex-llm)
is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local
PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low
latency. This PR adds Intel GPU support to `ipex-llm` llm integration.
**Dependencies:** `ipex-llm`
**Contribution maintainer**: @ivy-lv11 @Oscilloscope98
**tests and docs**: 
- Add: langchain/docs/docs/integrations/llms/ipex_llm_gpu.ipynb
- Update: langchain/docs/docs/integrations/llms/ipex_llm_gpu.ipynb
- Update: langchain/libs/community/tests/llms/test_ipex_llm.py

---------

Co-authored-by: ivy-lv11 <zhicunlv@gmail.com>
This commit is contained in:
Yuwen Hu
2024-09-02 20:49:08 +08:00
committed by GitHub
parent d19e074374
commit 566e9ba164
4 changed files with 311 additions and 34 deletions

View File

@@ -139,6 +139,16 @@ class IpexLLM(LLM):
kwargs = kwargs or {}
_tokenizer_id = tokenizer_id or model_id
# Set "cpu" as default device
if "device" not in _model_kwargs:
_model_kwargs["device"] = "cpu"
if _model_kwargs["device"] not in ["cpu", "xpu"]:
raise ValueError(
"IpexLLMBgeEmbeddings currently only supports device to be "
f"'cpu' or 'xpu', but you have: {_model_kwargs['device']}."
)
device = _model_kwargs.pop("device")
try:
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
@@ -186,6 +196,8 @@ class IpexLLM(LLM):
model_kwargs=_model_kwargs,
)
model.to(device)
return cls(
model_id=model_id,
model=model,
@@ -235,6 +247,7 @@ class IpexLLM(LLM):
from transformers import TextStreamer
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
input_ids = input_ids.to(self.model.device)
streamer = TextStreamer(
self.tokenizer, skip_prompt=True, skip_special_tokens=True
)
@@ -261,6 +274,7 @@ class IpexLLM(LLM):
return text
else:
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
input_ids = input_ids.to(self.model.device)
if stop is not None:
from transformers.generation.stopping_criteria import (
StoppingCriteriaList,

View File

@@ -13,12 +13,18 @@ skip_if_no_model_ids = pytest.mark.skipif(
not model_ids_to_test, reason="TEST_IPEXLLM_MODEL_IDS environment variable not set."
)
model_ids_to_test = [model_id.strip() for model_id in model_ids_to_test.split(",")] # type: ignore
device = os.getenv("TEST_IPEXLLM_MODEL_DEVICE") or "cpu"
def load_model(model_id: str) -> Any:
llm = IpexLLM.from_model_id(
model_id=model_id,
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
model_kwargs={
"temperature": 0,
"max_length": 16,
"trust_remote_code": True,
"device": device,
},
)
return llm