mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-10 07:21:03 +00:00
community[patch]: add more data types support to ipex-llm llm integration (#20833)
- **Description**: - **add support for more data types**: by default `IpexLLM` will load the model in int4 format. This PR adds more data types support such as `sym_in5`, `sym_int8`, etc. Data formats like NF3, NF4, FP4 and FP8 are only supported on GPU and will be added in future PR. - Fix a small issue in saving/loading, update api docs - **Dependencies**: `ipex-llm` library - **Document**: In `docs/docs/integrations/llms/ipex_llm.ipynb`, added instructions for saving/loading low-bit model. - **Tests**: added new test cases to `libs/community/tests/integration_tests/llms/test_ipex_llm.py`, added config params. - **Contribution maintainer**: @shane-huang
This commit is contained in:
@@ -23,6 +23,10 @@ class BigdlLLM(IpexLLM):
|
||||
cls,
|
||||
model_id: str,
|
||||
model_kwargs: Optional[dict] = None,
|
||||
*,
|
||||
tokenizer_id: Optional[str] = None,
|
||||
load_in_4bit: bool = True,
|
||||
load_in_low_bit: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLM:
|
||||
"""
|
||||
@@ -31,6 +35,8 @@ class BigdlLLM(IpexLLM):
|
||||
Args:
|
||||
model_id: Path for the huggingface repo id to be downloaded or
|
||||
the huggingface checkpoint folder.
|
||||
tokenizer_id: Path for the huggingface repo id to be downloaded or
|
||||
the huggingface checkpoint folder which contains the tokenizer.
|
||||
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
||||
kwargs: Extra arguments to pass to the model and tokenizer.
|
||||
|
||||
@@ -52,12 +58,27 @@ class BigdlLLM(IpexLLM):
|
||||
"Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
|
||||
)
|
||||
|
||||
if load_in_low_bit is not None:
|
||||
logger.warning(
|
||||
"""`load_in_low_bit` option is not supported in BigdlLLM and
|
||||
is ignored. For more data types support with `load_in_low_bit`,
|
||||
use IpexLLM instead."""
|
||||
)
|
||||
|
||||
if not load_in_4bit:
|
||||
raise ValueError(
|
||||
"BigdlLLM only supports loading in 4-bit mode, "
|
||||
"i.e. load_in_4bit = True. "
|
||||
"Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
|
||||
)
|
||||
|
||||
_model_kwargs = model_kwargs or {}
|
||||
_tokenizer_id = tokenizer_id or model_id
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
except Exception:
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
@@ -86,6 +107,8 @@ class BigdlLLM(IpexLLM):
|
||||
cls,
|
||||
model_id: str,
|
||||
model_kwargs: Optional[dict] = None,
|
||||
*,
|
||||
tokenizer_id: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLM:
|
||||
"""
|
||||
@@ -94,6 +117,8 @@ class BigdlLLM(IpexLLM):
|
||||
Args:
|
||||
|
||||
model_id: Path for the bigdl-llm transformers low-bit model folder.
|
||||
tokenizer_id: Path for the huggingface repo id or local model folder
|
||||
which contains the tokenizer.
|
||||
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
||||
kwargs: Extra arguments to pass to the model and tokenizer.
|
||||
|
||||
@@ -117,10 +142,12 @@ class BigdlLLM(IpexLLM):
|
||||
)
|
||||
|
||||
_model_kwargs = model_kwargs or {}
|
||||
_tokenizer_id = tokenizer_id or model_id
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
except Exception:
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
|
||||
try:
|
||||
model = AutoModelForCausalLM.load_low_bit(model_id, **_model_kwargs)
|
||||
|
@@ -42,6 +42,10 @@ class IpexLLM(LLM):
|
||||
cls,
|
||||
model_id: str,
|
||||
model_kwargs: Optional[dict] = None,
|
||||
*,
|
||||
tokenizer_id: Optional[str] = None,
|
||||
load_in_4bit: bool = True,
|
||||
load_in_low_bit: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLM:
|
||||
"""
|
||||
@@ -50,52 +54,29 @@ class IpexLLM(LLM):
|
||||
Args:
|
||||
model_id: Path for the huggingface repo id to be downloaded or
|
||||
the huggingface checkpoint folder.
|
||||
tokenizer_id: Path for the huggingface repo id to be downloaded or
|
||||
the huggingface checkpoint folder which contains the tokenizer.
|
||||
load_in_4bit: "Whether to load model in 4bit.
|
||||
Unused if `load_in_low_bit` is not None.
|
||||
load_in_low_bit: Which low bit precisions to use when loading model.
|
||||
Example values: 'sym_int4', 'asym_int4', 'fp4', 'nf4', 'fp8', etc.
|
||||
Overrides `load_in_4bit` if specified.
|
||||
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
||||
kwargs: Extra arguments to pass to the model and tokenizer.
|
||||
|
||||
Returns:
|
||||
An object of IpexLLM.
|
||||
|
||||
"""
|
||||
try:
|
||||
from ipex_llm.transformers import (
|
||||
AutoModel,
|
||||
AutoModelForCausalLM,
|
||||
)
|
||||
from transformers import AutoTokenizer, LlamaTokenizer
|
||||
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import ipex-llm or transformers. "
|
||||
"Please install it with `pip install --pre --upgrade ipex-llm[all]`."
|
||||
)
|
||||
|
||||
_model_kwargs = model_kwargs or {}
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
except Exception:
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id, load_in_4bit=True, **_model_kwargs
|
||||
)
|
||||
except Exception:
|
||||
model = AutoModel.from_pretrained(
|
||||
model_id, load_in_4bit=True, **_model_kwargs
|
||||
)
|
||||
|
||||
if "trust_remote_code" in _model_kwargs:
|
||||
_model_kwargs = {
|
||||
k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
|
||||
}
|
||||
|
||||
return cls(
|
||||
return cls._load_model(
|
||||
model_id=model_id,
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
model_kwargs=_model_kwargs,
|
||||
**kwargs,
|
||||
tokenizer_id=tokenizer_id,
|
||||
low_bit_model=False,
|
||||
load_in_4bit=load_in_4bit,
|
||||
load_in_low_bit=load_in_low_bit,
|
||||
model_kwargs=model_kwargs,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -103,6 +84,8 @@ class IpexLLM(LLM):
|
||||
cls,
|
||||
model_id: str,
|
||||
model_kwargs: Optional[dict] = None,
|
||||
*,
|
||||
tokenizer_id: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLM:
|
||||
"""
|
||||
@@ -111,12 +94,36 @@ class IpexLLM(LLM):
|
||||
Args:
|
||||
|
||||
model_id: Path for the ipex-llm transformers low-bit model folder.
|
||||
tokenizer_id: Path for the huggingface repo id or local model folder
|
||||
which contains the tokenizer.
|
||||
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
||||
kwargs: Extra arguments to pass to the model and tokenizer.
|
||||
|
||||
Returns:
|
||||
An object of IpexLLM.
|
||||
"""
|
||||
|
||||
return cls._load_model(
|
||||
model_id=model_id,
|
||||
tokenizer_id=tokenizer_id,
|
||||
low_bit_model=True,
|
||||
load_in_4bit=False, # not used for low-bit model
|
||||
load_in_low_bit=None, # not used for low-bit model
|
||||
model_kwargs=model_kwargs,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _load_model(
|
||||
cls,
|
||||
model_id: str,
|
||||
tokenizer_id: Optional[str] = None,
|
||||
load_in_4bit: bool = False,
|
||||
load_in_low_bit: Optional[str] = None,
|
||||
low_bit_model: bool = False,
|
||||
model_kwargs: Optional[dict] = None,
|
||||
kwargs: Optional[dict] = None,
|
||||
) -> Any:
|
||||
try:
|
||||
from ipex_llm.transformers import (
|
||||
AutoModel,
|
||||
@@ -126,26 +133,62 @@ class IpexLLM(LLM):
|
||||
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import ipex-llm or transformers. "
|
||||
"Please install it with `pip install --pre --upgrade ipex-llm[all]`."
|
||||
"Could not import ipex-llm. "
|
||||
"Please install `ipex-llm` properly following installation guides: "
|
||||
"https://github.com/intel-analytics/ipex-llm?tab=readme-ov-file#install-ipex-llm."
|
||||
)
|
||||
|
||||
_model_kwargs = model_kwargs or {}
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
except Exception:
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
kwargs = kwargs or {}
|
||||
|
||||
_tokenizer_id = tokenizer_id or model_id
|
||||
|
||||
try:
|
||||
model = AutoModelForCausalLM.load_low_bit(model_id, **_model_kwargs)
|
||||
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
except Exception:
|
||||
model = AutoModel.load_low_bit(model_id, **_model_kwargs)
|
||||
tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
|
||||
# restore model_kwargs
|
||||
if "trust_remote_code" in _model_kwargs:
|
||||
_model_kwargs = {
|
||||
k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
|
||||
}
|
||||
|
||||
# load model with AutoModelForCausalLM and falls back to AutoModel on failure.
|
||||
load_kwargs = {
|
||||
"use_cache": True,
|
||||
"trust_remote_code": True,
|
||||
}
|
||||
|
||||
if not low_bit_model:
|
||||
if load_in_low_bit is not None:
|
||||
load_function_name = "from_pretrained"
|
||||
load_kwargs["load_in_low_bit"] = load_in_low_bit # type: ignore
|
||||
else:
|
||||
load_function_name = "from_pretrained"
|
||||
load_kwargs["load_in_4bit"] = load_in_4bit
|
||||
else:
|
||||
load_function_name = "load_low_bit"
|
||||
|
||||
try:
|
||||
# Attempt to load with AutoModelForCausalLM
|
||||
model = cls._load_model_general(
|
||||
AutoModelForCausalLM,
|
||||
load_function_name=load_function_name,
|
||||
model_id=model_id,
|
||||
load_kwargs=load_kwargs,
|
||||
model_kwargs=_model_kwargs,
|
||||
)
|
||||
except Exception:
|
||||
# Fallback to AutoModel if there's an exception
|
||||
model = cls._load_model_general(
|
||||
AutoModel,
|
||||
load_function_name=load_function_name,
|
||||
model_id=model_id,
|
||||
load_kwargs=load_kwargs,
|
||||
model_kwargs=_model_kwargs,
|
||||
)
|
||||
|
||||
return cls(
|
||||
model_id=model_id,
|
||||
model=model,
|
||||
@@ -154,6 +197,24 @@ class IpexLLM(LLM):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _load_model_general(
|
||||
model_class: Any,
|
||||
load_function_name: str,
|
||||
model_id: str,
|
||||
load_kwargs: dict,
|
||||
model_kwargs: dict,
|
||||
) -> Any:
|
||||
"""General function to attempt to load a model."""
|
||||
try:
|
||||
load_function = getattr(model_class, load_function_name)
|
||||
return load_function(model_id, **{**load_kwargs, **model_kwargs})
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to load model using "
|
||||
f"{model_class.__name__}.{load_function_name}: {e}"
|
||||
)
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
|
@@ -1,23 +1,43 @@
|
||||
"""Test BigdlLLM"""
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_core.outputs import LLMResult
|
||||
|
||||
from langchain_community.llms.bigdl_llm import BigdlLLM
|
||||
|
||||
model_ids_to_test = os.getenv("TEST_BIGDLLLM_MODEL_IDS") or ""
|
||||
skip_if_no_model_ids = pytest.mark.skipif(
|
||||
not model_ids_to_test,
|
||||
reason="TEST_BIGDLLLM_MODEL_IDS environment variable not set.",
|
||||
)
|
||||
model_ids_to_test = [model_id.strip() for model_id in model_ids_to_test.split(",")] # type: ignore
|
||||
|
||||
def test_call() -> None:
|
||||
|
||||
@skip_if_no_model_ids
|
||||
@pytest.mark.parametrize(
|
||||
"model_id",
|
||||
model_ids_to_test,
|
||||
)
|
||||
def test_call(model_id: str) -> None:
|
||||
"""Test valid call to bigdl-llm."""
|
||||
llm = BigdlLLM.from_model_id(
|
||||
model_id="lmsys/vicuna-7b-v1.5",
|
||||
model_id=model_id,
|
||||
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
|
||||
)
|
||||
output = llm.invoke("Hello!")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_generate() -> None:
|
||||
@skip_if_no_model_ids
|
||||
@pytest.mark.parametrize(
|
||||
"model_id",
|
||||
model_ids_to_test,
|
||||
)
|
||||
def test_generate(model_id: str) -> None:
|
||||
"""Test valid call to bigdl-llm."""
|
||||
llm = BigdlLLM.from_model_id(
|
||||
model_id="lmsys/vicuna-7b-v1.5",
|
||||
model_id=model_id,
|
||||
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
|
||||
)
|
||||
output = llm.generate(["Hello!"])
|
||||
|
@@ -1,25 +1,88 @@
|
||||
"""Test IPEX LLM"""
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from langchain_core.outputs import LLMResult
|
||||
|
||||
from langchain_community.llms.ipex_llm import IpexLLM
|
||||
from langchain_community.llms import IpexLLM
|
||||
|
||||
model_ids_to_test = os.getenv("TEST_IPEXLLM_MODEL_IDS") or ""
|
||||
skip_if_no_model_ids = pytest.mark.skipif(
|
||||
not model_ids_to_test, reason="TEST_IPEXLLM_MODEL_IDS environment variable not set."
|
||||
)
|
||||
model_ids_to_test = [model_id.strip() for model_id in model_ids_to_test.split(",")] # type: ignore
|
||||
|
||||
|
||||
def test_call() -> None:
|
||||
"""Test valid call to ipex-llm."""
|
||||
def load_model(model_id: str) -> Any:
|
||||
llm = IpexLLM.from_model_id(
|
||||
model_id="lmsys/vicuna-7b-v1.5",
|
||||
model_id=model_id,
|
||||
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
|
||||
)
|
||||
return llm
|
||||
|
||||
|
||||
def load_model_more_types(model_id: str, load_in_low_bit: str) -> Any:
|
||||
llm = IpexLLM.from_model_id(
|
||||
model_id=model_id,
|
||||
load_in_low_bit=load_in_low_bit,
|
||||
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
|
||||
)
|
||||
return llm
|
||||
|
||||
|
||||
@skip_if_no_model_ids
|
||||
@pytest.mark.parametrize(
|
||||
"model_id",
|
||||
model_ids_to_test,
|
||||
)
|
||||
def test_call(model_id: str) -> None:
|
||||
"""Test valid call."""
|
||||
llm = load_model(model_id)
|
||||
output = llm.invoke("Hello!")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_generate() -> None:
|
||||
"""Test valid call to ipex-llm."""
|
||||
llm = IpexLLM.from_model_id(
|
||||
model_id="lmsys/vicuna-7b-v1.5",
|
||||
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
|
||||
)
|
||||
@skip_if_no_model_ids
|
||||
@pytest.mark.parametrize(
|
||||
"model_id",
|
||||
model_ids_to_test,
|
||||
)
|
||||
def test_asym_int4(model_id: str) -> None:
|
||||
"""Test asym int4 data type."""
|
||||
llm = load_model_more_types(model_id=model_id, load_in_low_bit="asym_int4")
|
||||
output = llm.invoke("Hello!")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
@skip_if_no_model_ids
|
||||
@pytest.mark.parametrize(
|
||||
"model_id",
|
||||
model_ids_to_test,
|
||||
)
|
||||
def test_generate(model_id: str) -> None:
|
||||
"""Test valid generate."""
|
||||
llm = load_model(model_id)
|
||||
output = llm.generate(["Hello!"])
|
||||
assert isinstance(output, LLMResult)
|
||||
assert isinstance(output.generations, list)
|
||||
|
||||
|
||||
@skip_if_no_model_ids
|
||||
@pytest.mark.parametrize(
|
||||
"model_id",
|
||||
model_ids_to_test,
|
||||
)
|
||||
def test_save_load_lowbit(model_id: str) -> None:
|
||||
"""Test save and load lowbit model."""
|
||||
saved_lowbit_path = "/tmp/saved_model"
|
||||
llm = load_model(model_id)
|
||||
llm.model.save_low_bit(saved_lowbit_path)
|
||||
del llm
|
||||
loaded_llm = IpexLLM.from_model_id_low_bit(
|
||||
model_id=saved_lowbit_path,
|
||||
tokenizer_id=model_id,
|
||||
model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True},
|
||||
)
|
||||
output = loaded_llm.invoke("Hello!")
|
||||
assert isinstance(output, str)
|
||||
|
Reference in New Issue
Block a user