From ae471a7dcbd4b485ca3c029324dcf1153a981f2a Mon Sep 17 00:00:00 2001 From: Shengsheng Huang Date: Sat, 2 Mar 2024 02:04:53 +0800 Subject: [PATCH] community[minor]: add BigDL-LLM integrations (#17953) - **Description**: [`bigdl-llm`](https://github.com/intel-analytics/BigDL) is a library for running LLM on Intel XPU (from Laptop to GPU to Cloud) using INT4/FP4/INT8/FP8 with very low latency (for any PyTorch model). This PR adds bigdl-llm integrations to langchain. - **Issue**: NA - **Dependencies**: `bigdl-llm` library - **Contribution maintainer**: @shane-huang Examples added: - docs/docs/integrations/llms/bigdl.ipynb --- docs/docs/integrations/llms/bigdl.ipynb | 182 ++++++++++++++ .../langchain_community/llms/bigdl.py | 222 ++++++++++++++++++ .../integration_tests/llms/test_bigdl.py | 25 ++ 3 files changed, 429 insertions(+) create mode 100644 docs/docs/integrations/llms/bigdl.ipynb create mode 100644 libs/community/langchain_community/llms/bigdl.py create mode 100644 libs/community/tests/integration_tests/llms/test_bigdl.py diff --git a/docs/docs/integrations/llms/bigdl.ipynb b/docs/docs/integrations/llms/bigdl.ipynb new file mode 100644 index 00000000000..60684898d7e --- /dev/null +++ b/docs/docs/integrations/llms/bigdl.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# BigDL-LLM\n", + "\n", + "> [BigDL-LLM](https://github.com/intel-analytics/BigDL/) is a low-bit LLM optimization library on Intel XPU (Xeon/Core/Flex/Arc/Max). It can make LLMs run extremely fast and consume much less memory on Intel platforms. It is open sourced under Apache 2.0 License.\n", + "\n", + "This example goes over how to use LangChain to interact with BigDL-LLM for text generation. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Update Langchain\n", + "\n", + "%pip install -qU langchain langchain-community" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install BigDL-LLM for running LLMs locally on Intel CPU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install BigDL\n", + "%pip install --pre --upgrade bigdl-llm[all]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain\n", + "from langchain_community.llms.bigdl import BigdlLLM\n", + "from langchain_core.prompts import PromptTemplate" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"USER: {question}\\nASSISTANT:\"\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load Model: " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "69e018750ffb4de1af22ce49cd6957f4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00 LLM: + """ + Construct object from model_id + + Args: + model_id: Path for the huggingface repo id to be downloaded or + the huggingface checkpoint folder. + model_kwargs: Keyword arguments to pass to the model and tokenizer. + kwargs: Extra arguments to pass to the model and tokenizer. + + Returns: + An object of TransformersLLM. + """ + try: + from bigdl.llm.transformers import ( + AutoModel, + AutoModelForCausalLM, + ) + from transformers import AutoTokenizer, LlamaTokenizer + + except ImportError: + raise ValueError( + "Could not import bigdl-llm or transformers. " + "Please install it with `pip install --pre --upgrade bigdl-llm[all]`." + ) + + _model_kwargs = model_kwargs or {} + + try: + tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs) + except Exception: + tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs) + + try: + model = AutoModelForCausalLM.from_pretrained( + model_id, load_in_4bit=True, **_model_kwargs + ) + except Exception: + model = AutoModel.from_pretrained( + model_id, load_in_4bit=True, **_model_kwargs + ) + + if "trust_remote_code" in _model_kwargs: + _model_kwargs = { + k: v for k, v in _model_kwargs.items() if k != "trust_remote_code" + } + + return cls( + model_id=model_id, + model=model, + tokenizer=tokenizer, + model_kwargs=_model_kwargs, + **kwargs, + ) + + @classmethod + def from_model_id_low_bit( + cls, + model_id: str, + model_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> LLM: + """ + Construct low_bit object from model_id + + Args: + + model_id: Path for the bigdl transformers low-bit model checkpoint folder. + model_kwargs: Keyword arguments to pass to the model and tokenizer. + kwargs: Extra arguments to pass to the model and tokenizer. + + Returns: + An object of TransformersLLM. + """ + try: + from bigdl.llm.transformers import ( + AutoModel, + AutoModelForCausalLM, + ) + from transformers import AutoTokenizer, LlamaTokenizer + + except ImportError: + raise ValueError( + "Could not import bigdl-llm or transformers. " + "Please install it with `pip install --pre --upgrade bigdl-llm[all]`" + ) + + _model_kwargs = model_kwargs or {} + try: + tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs) + except Exception: + tokenizer = LlamaTokenizer.from_pretrained(model_id, **_model_kwargs) + + try: + model = AutoModelForCausalLM.load_low_bit(model_id, **_model_kwargs) + except Exception: + model = AutoModel.load_low_bit(model_id, **_model_kwargs) + + if "trust_remote_code" in _model_kwargs: + _model_kwargs = { + k: v for k, v in _model_kwargs.items() if k != "trust_remote_code" + } + + return cls( + model_id=model_id, + model=model, + tokenizer=tokenizer, + model_kwargs=_model_kwargs, + **kwargs, + ) + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + "model_id": self.model_id, + "model_kwargs": self.model_kwargs, + } + + @property + def _llm_type(self) -> str: + return "BigDL-llm" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + if self.streaming: + from transformers import TextStreamer + + input_ids = self.tokenizer.encode(prompt, return_tensors="pt") + streamer = TextStreamer( + self.tokenizer, skip_prompt=True, skip_special_tokens=True + ) + if stop is not None: + from transformers.generation.stopping_criteria import ( + StoppingCriteriaList, + ) + from transformers.tools.agents import StopSequenceCriteria + + # stop generation when stop words are encountered + # TODO: stop generation when the following one is stop word + stopping_criteria = StoppingCriteriaList( + [StopSequenceCriteria(stop, self.tokenizer)] + ) + else: + stopping_criteria = None + output = self.model.generate( + input_ids, + streamer=streamer, + stopping_criteria=stopping_criteria, + **kwargs, + ) + text = self.tokenizer.decode(output[0], skip_special_tokens=True) + return text + else: + input_ids = self.tokenizer.encode(prompt, return_tensors="pt") + if stop is not None: + from transformers.generation.stopping_criteria import ( + StoppingCriteriaList, + ) + from transformers.tools.agents import StopSequenceCriteria + + stopping_criteria = StoppingCriteriaList( + [StopSequenceCriteria(stop, self.tokenizer)] + ) + else: + stopping_criteria = None + output = self.model.generate( + input_ids, stopping_criteria=stopping_criteria, **kwargs + ) + text = self.tokenizer.decode(output[0], skip_special_tokens=True)[ + len(prompt) : + ] + return text diff --git a/libs/community/tests/integration_tests/llms/test_bigdl.py b/libs/community/tests/integration_tests/llms/test_bigdl.py new file mode 100644 index 00000000000..905a373c483 --- /dev/null +++ b/libs/community/tests/integration_tests/llms/test_bigdl.py @@ -0,0 +1,25 @@ +"""Test BigDL LLM""" +from langchain_core.outputs import LLMResult + +from langchain_community.llms.bigdl import BigdlLLM + + +def test_call() -> None: + """Test valid call to baichuan.""" + llm = BigdlLLM.from_model_id( + model_id="lmsys/vicuna-7b-v1.5", + model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True}, + ) + output = llm("Hello!") + assert isinstance(output, str) + + +def test_generate() -> None: + """Test valid call to baichuan.""" + llm = BigdlLLM.from_model_id( + model_id="lmsys/vicuna-7b-v1.5", + model_kwargs={"temperature": 0, "max_length": 16, "trust_remote_code": True}, + ) + output = llm.generate(["Hello!"]) + assert isinstance(output, LLMResult) + assert isinstance(output.generations, list)