mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-10 23:41:28 +00:00
community[patch]: add more data types support to ipex-llm llm integration (#20833)
- **Description**: - **add support for more data types**: by default `IpexLLM` will load the model in int4 format. This PR adds more data types support such as `sym_in5`, `sym_int8`, etc. Data formats like NF3, NF4, FP4 and FP8 are only supported on GPU and will be added in future PR. - Fix a small issue in saving/loading, update api docs - **Dependencies**: `ipex-llm` library - **Document**: In `docs/docs/integrations/llms/ipex_llm.ipynb`, added instructions for saving/loading low-bit model. - **Tests**: added new test cases to `libs/community/tests/integration_tests/llms/test_ipex_llm.py`, added config params. - **Contribution maintainer**: @shane-huang
This commit is contained in:
@@ -23,6 +23,10 @@ class BigdlLLM(IpexLLM):
|
||||
cls,
|
||||
model_id: str,
|
||||
model_kwargs: Optional[dict] = None,
|
||||
*,
|
||||
tokenizer_id: Optional[str] = None,
|
||||
load_in_4bit: bool = True,
|
||||
load_in_low_bit: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLM:
|
||||
"""
|
||||
@@ -31,6 +35,8 @@ class BigdlLLM(IpexLLM):
|
||||
Args:
|
||||
model_id: Path for the huggingface repo id to be downloaded or
|
||||
the huggingface checkpoint folder.
|
||||
tokenizer_id: Path for the huggingface repo id to be downloaded or
|
||||
the huggingface checkpoint folder which contains the tokenizer.
|
||||
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
||||
kwargs: Extra arguments to pass to the model and tokenizer.
|
||||
|
||||
@@ -52,12 +58,27 @@ class BigdlLLM(IpexLLM):
|
||||
"Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
|
||||
)
|
||||
|
||||
if load_in_low_bit is not None:
|
||||
logger.warning(
|
||||
"""`load_in_low_bit` option is not supported in BigdlLLM and
|
||||
is ignored. For more data types support with `load_in_low_bit`,
|
||||
use IpexLLM instead."""
|
||||
)
|
||||
|
||||
if not load_in_4bit:
|
||||
raise ValueError(
|
||||
"BigdlLLM only supports loading in 4-bit mode, "
|
||||
"i.e. load_in_4bit = True. "
|
||||
"Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
|
||||
)
|
||||
|
||||
_model_kwargs = model_kwargs or {}
|
||||
_tokenizer_id = tokenizer_id or model_id
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
except Exception:
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
@@ -86,6 +107,8 @@ class BigdlLLM(IpexLLM):
|
||||
cls,
|
||||
model_id: str,
|
||||
model_kwargs: Optional[dict] = None,
|
||||
*,
|
||||
tokenizer_id: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> LLM:
|
||||
"""
|
||||
@@ -94,6 +117,8 @@ class BigdlLLM(IpexLLM):
|
||||
Args:
|
||||
|
||||
model_id: Path for the bigdl-llm transformers low-bit model folder.
|
||||
tokenizer_id: Path for the huggingface repo id or local model folder
|
||||
which contains the tokenizer.
|
||||
model_kwargs: Keyword arguments to pass to the model and tokenizer.
|
||||
kwargs: Extra arguments to pass to the model and tokenizer.
|
||||
|
||||
@@ -117,10 +142,12 @@ class BigdlLLM(IpexLLM):
|
||||
)
|
||||
|
||||
_model_kwargs = model_kwargs or {}
|
||||
_tokenizer_id = tokenizer_id or model_id
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
except Exception:
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
|
||||
tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
|
||||
|
||||
try:
|
||||
model = AutoModelForCausalLM.load_low_bit(model_id, **_model_kwargs)
|
||||
|
Reference in New Issue
Block a user