community[patch]: add more data types support to ipex-llm llm integration (#20833)

- **Description**: - **add support for more data types**: by default `IpexLLM` will load the model in int4 format. This PR adds more data types support such as `sym_in5`, `sym_int8`, etc. Data formats like NF3, NF4, FP4 and FP8 are only supported on GPU and will be added in future PR. - Fix a small issue in saving/loading, update api docs - **Dependencies**: `ipex-llm` library - **Document**: In `docs/docs/integrations/llms/ipex_llm.ipynb`, added instructions for saving/loading low-bit model. - **Tests**: added new test cases to `libs/community/tests/integration_tests/llms/test_ipex_llm.py`, added config params. - **Contribution maintainer**: @shane-huang
2025-09-10 23:41:28 +00:00 · 2024-04-26 03:58:18 +08:00
parent dc921f0823
commit fd1061e7bf
5 changed files with 343 additions and 85 deletions
--- a/libs/community/langchain_community/llms/bigdl_llm.py
+++ b/libs/community/langchain_community/llms/bigdl_llm.py
@@ -23,6 +23,10 @@ class BigdlLLM(IpexLLM):
        cls,
        model_id: str,
        model_kwargs: Optional[dict] = None,
+        *,
+        tokenizer_id: Optional[str] = None,
+        load_in_4bit: bool = True,
+        load_in_low_bit: Optional[str] = None,
        **kwargs: Any,
    ) -> LLM:
        """
@@ -31,6 +35,8 @@ class BigdlLLM(IpexLLM):
        Args:
            model_id: Path for the huggingface repo id to be downloaded or
                      the huggingface checkpoint folder.
+            tokenizer_id: Path for the huggingface repo id to be downloaded or
+                      the huggingface checkpoint folder which contains the tokenizer.
            model_kwargs: Keyword arguments to pass to the model and tokenizer.
            kwargs: Extra arguments to pass to the model and tokenizer.

@@ -52,12 +58,27 @@ class BigdlLLM(IpexLLM):
                "Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
            )

+        if load_in_low_bit is not None:
+            logger.warning(
+                """`load_in_low_bit` option is not supported in BigdlLLM and 
+                is ignored. For more data types support with `load_in_low_bit`, 
+                use IpexLLM instead."""
+            )
+
+        if not load_in_4bit:
+            raise ValueError(
+                "BigdlLLM only supports loading in 4-bit mode, "
+                "i.e. load_in_4bit = True. "
+                "Please install it with `pip install --pre --upgrade bigdl-llm[all]`."
+            )
+
        _model_kwargs = model_kwargs or {}
+        _tokenizer_id = tokenizer_id or model_id

        try:
-            tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
+            tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
        except Exception:
-            tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
+            tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)

        try:
            model = AutoModelForCausalLM.from_pretrained(
@@ -86,6 +107,8 @@ class BigdlLLM(IpexLLM):
        cls,
        model_id: str,
        model_kwargs: Optional[dict] = None,
+        *,
+        tokenizer_id: Optional[str] = None,
        **kwargs: Any,
    ) -> LLM:
        """
@@ -94,6 +117,8 @@ class BigdlLLM(IpexLLM):
        Args:

            model_id: Path for the bigdl-llm transformers low-bit model folder.
+            tokenizer_id: Path for the huggingface repo id or local model folder
+                      which contains the tokenizer.
            model_kwargs: Keyword arguments to pass to the model and tokenizer.
            kwargs: Extra arguments to pass to the model and tokenizer.

@@ -117,10 +142,12 @@ class BigdlLLM(IpexLLM):
            )

        _model_kwargs = model_kwargs or {}
+        _tokenizer_id = tokenizer_id or model_id
+
        try:
-            tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
+            tokenizer = AutoTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)
        except Exception:
-            tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs)
+            tokenizer = LlamaTokenizer.from_pretrained(_tokenizer_id, **_model_kwargs)

        try:
            model = AutoModelForCausalLM.load_low_bit(model_id, **_model_kwargs)