mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-05 13:06:03 +00:00
community: add Intel GPU support to ipex-llm
llm integration (#22458)
**Description:** [IPEX-LLM](https://github.com/intel-analytics/ipex-llm) is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency. This PR adds Intel GPU support to `ipex-llm` llm integration. **Dependencies:** `ipex-llm` **Contribution maintainer**: @ivy-lv11 @Oscilloscope98 **tests and docs**: - Add: langchain/docs/docs/integrations/llms/ipex_llm_gpu.ipynb - Update: langchain/docs/docs/integrations/llms/ipex_llm_gpu.ipynb - Update: langchain/libs/community/tests/llms/test_ipex_llm.py --------- Co-authored-by: ivy-lv11 <zhicunlv@gmail.com>
This commit is contained in:
@@ -139,6 +139,16 @@ class IpexLLM(LLM):
|
||||
kwargs = kwargs or {}
|
||||
|
||||
_tokenizer_id = tokenizer_id or model_id
|
||||
# Set "cpu" as default device
|
||||
if "device" not in _model_kwargs:
|
||||
_model_kwargs["device"] = "cpu"
|
||||
|
||||
if _model_kwargs["device"] not in ["cpu", "xpu"]:
|
||||
raise ValueError(
|
||||
"IpexLLMBgeEmbeddings currently only supports device to be "
|
||||
f"'cpu' or 'xpu', but you have: {_model_kwargs['device']}."
|
||||
)
|
||||
device = _model_kwargs.pop("device")
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
@@ -186,6 +196,8 @@ class IpexLLM(LLM):
|
||||
model_kwargs=_model_kwargs,
|
||||
)
|
||||
|
||||
model.to(device)
|
||||
|
||||
return cls(
|
||||
model_id=model_id,
|
||||
model=model,
|
||||
@@ -235,6 +247,7 @@ class IpexLLM(LLM):
|
||||
from transformers import TextStreamer
|
||||
|
||||
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
|
||||
input_ids = input_ids.to(self.model.device)
|
||||
streamer = TextStreamer(
|
||||
self.tokenizer, skip_prompt=True, skip_special_tokens=True
|
||||
)
|
||||
@@ -261,6 +274,7 @@ class IpexLLM(LLM):
|
||||
return text
|
||||
else:
|
||||
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
|
||||
input_ids = input_ids.to(self.model.device)
|
||||
if stop is not None:
|
||||
from transformers.generation.stopping_criteria import (
|
||||
StoppingCriteriaList,
|
||||
|
@@ -13,12 +13,18 @@ skip_if_no_model_ids = pytest.mark.skipif(
|
||||
not model_ids_to_test, reason="TEST_IPEXLLM_MODEL_IDS environment variable not set."
|
||||
)
|
||||
model_ids_to_test = [model_id.strip() for model_id in model_ids_to_test.split(",")] # type: ignore
|
||||
device = os.getenv("TEST_IPEXLLM_MODEL_DEVICE") or "cpu"
|
||||
|
||||
|
||||
def load_model(model_id: str) -> Any:
|
||||
llm = IpexLLM.from_model_id(
|
||||
model_id=model_id,
|
||||
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
|
||||
model_kwargs={
|
||||
"temperature": 0,
|
||||
"max_length": 16,
|
||||
"trust_remote_code": True,
|
||||
"device": device,
|
||||
},
|
||||
)
|
||||
return llm
|
||||
|
||||
|
Reference in New Issue
Block a user