Adds DeepSparse as an LLM (#9184)

Adds [DeepSparse](https://github.com/neuralmagic/deepsparse) as an LLM
backend. DeepSparse supports running various open-source sparsified
models hosted on [SparseZoo](https://sparsezoo.neuralmagic.com/) for
performance gains on CPUs.

Twitter handles: @mgoin_ @neuralmagic


---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Michael Goin
2023-08-14 01:35:58 -04:00
committed by GitHub
parent 0fa69d8988
commit 621da3c164
5 changed files with 220 additions and 0 deletions

View File

@@ -38,6 +38,7 @@ from langchain.llms.cohere import Cohere
from langchain.llms.ctransformers import CTransformers
from langchain.llms.databricks import Databricks
from langchain.llms.deepinfra import DeepInfra
from langchain.llms.deepsparse import DeepSparse
from langchain.llms.edenai import EdenAI
from langchain.llms.fake import FakeListLLM
from langchain.llms.fireworks import Fireworks, FireworksChat
@@ -103,6 +104,7 @@ __all__ = [
"Cohere",
"Databricks",
"DeepInfra",
"DeepSparse",
"EdenAI",
"FakeListLLM",
"Fireworks",
@@ -172,6 +174,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
"ctransformers": CTransformers,
"databricks": Databricks,
"deepinfra": DeepInfra,
"deepsparse": DeepSparse,
"edenai": EdenAI,
"fake-list": FakeListLLM,
"forefrontai": ForefrontAI,

View File

@@ -0,0 +1,87 @@
# flake8: noqa
from typing import Any, Dict, Optional, List
from pydantic import root_validator
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
class DeepSparse(LLM):
"""Neural Magic DeepSparse LLM interface.
To use, you should have the ``deepsparse`` or ``deepsparse-nightly``
python package installed. See https://github.com/neuralmagic/deepsparse
This interface let's you deploy optimized LLMs straight from the
[SparseZoo](https://sparsezoo.neuralmagic.com/?useCase=text_generation)
Example:
.. code-block:: python
from langchain.llms import DeepSparse
llm = DeepSparse(model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none")
""" # noqa: E501
pipeline: Any #: :meta private:
model: str
"""The path to a model file or directory or the name of a SparseZoo model stub."""
config: Optional[Dict[str, Any]] = None
"""Key word arguments passed to the pipeline."""
@property
def _identifying_params(self) -> Dict[str, Any]:
"""Get the identifying parameters."""
return {
"model": self.model,
"config": self.config,
}
@property
def _llm_type(self) -> str:
"""Return type of llm."""
return "deepsparse"
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that ``deepsparse`` package is installed."""
try:
from deepsparse import Pipeline
except ImportError:
raise ImportError(
"Could not import `deepsparse` package. "
"Please install it with `pip install deepsparse`"
)
config = values["config"] or {}
values["pipeline"] = Pipeline.create(
task="text_generation",
model_path=values["model"],
**config,
)
return values
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""Generate text from a prompt.
Args:
prompt: The prompt to generate text from.
stop: A list of strings to stop generation when encountered.
Returns:
The generated text.
Example:
.. code-block:: python
response = llm("Tell me a joke.")
"""
text = self.pipeline(sequences=prompt).sequences[0]
if stop is not None:
text = enforce_stop_tokens(text, stop)
return "".join(text)

View File

@@ -0,0 +1,17 @@
"""Test DeepSparse wrapper."""
from langchain.llms import DeepSparse
def test_deepsparse_call() -> None:
"""Test valid call to DeepSparse."""
config = {"max_generated_tokens": 5, "use_deepsparse_cache": False}
llm = DeepSparse(
model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none",
config=config,
)
output = llm("def ")
assert isinstance(output, str)
assert len(output) > 1
assert output == "ids_to_names"