Adds DeepSparse as an LLM (#9184)

Adds [DeepSparse](https://github.com/neuralmagic/deepsparse) as an LLM backend. DeepSparse supports running various open-source sparsified models hosted on [SparseZoo](https://sparsezoo.neuralmagic.com/) for performance gains on CPUs. Twitter handles: @mgoin_ @neuralmagic --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-22 11:00:37 +00:00 · 2023-08-14 01:35:58 -04:00
parent 0fa69d8988
commit 621da3c164
5 changed files with 220 additions and 0 deletions
--- a/libs/langchain/langchain/llms/init.py
+++ b/libs/langchain/langchain/llms/init.py
@@ -38,6 +38,7 @@ from langchain.llms.cohere import Cohere
 from langchain.llms.ctransformers import CTransformers
 from langchain.llms.databricks import Databricks
 from langchain.llms.deepinfra import DeepInfra
+from langchain.llms.deepsparse import DeepSparse
 from langchain.llms.edenai import EdenAI
 from langchain.llms.fake import FakeListLLM
 from langchain.llms.fireworks import Fireworks, FireworksChat
@@ -103,6 +104,7 @@ __all__ = [
    "Cohere",
    "Databricks",
    "DeepInfra",
+    "DeepSparse",
    "EdenAI",
    "FakeListLLM",
    "Fireworks",
@@ -172,6 +174,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
    "ctransformers": CTransformers,
    "databricks": Databricks,
    "deepinfra": DeepInfra,
+    "deepsparse": DeepSparse,
    "edenai": EdenAI,
    "fake-list": FakeListLLM,
    "forefrontai": ForefrontAI,
--- a/libs/langchain/langchain/llms/deepsparse.py
+++ b/libs/langchain/langchain/llms/deepsparse.py
@@ -0,0 +1,87 @@
+# flake8: noqa
+from typing import Any, Dict, Optional, List
+
+from pydantic import root_validator
+
+from langchain.callbacks.manager import CallbackManagerForLLMRun
+from langchain.llms.base import LLM
+from langchain.llms.utils import enforce_stop_tokens
+
+
+class DeepSparse(LLM):
+    """Neural Magic DeepSparse LLM interface.
+
+    To use, you should have the ``deepsparse`` or ``deepsparse-nightly``
+    python package installed. See https://github.com/neuralmagic/deepsparse
+
+    This interface let's you deploy optimized LLMs straight from the
+    [SparseZoo](https://sparsezoo.neuralmagic.com/?useCase=text_generation)
+    Example:
+        .. code-block:: python
+            from langchain.llms import DeepSparse
+            llm = DeepSparse(model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none")
+    """  # noqa: E501
+
+    pipeline: Any  #: :meta private:
+
+    model: str
+    """The path to a model file or directory or the name of a SparseZoo model stub."""
+
+    config: Optional[Dict[str, Any]] = None
+    """Key word arguments passed to the pipeline."""
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Get the identifying parameters."""
+        return {
+            "model": self.model,
+            "config": self.config,
+        }
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "deepsparse"
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that ``deepsparse`` package is installed."""
+        try:
+            from deepsparse import Pipeline
+        except ImportError:
+            raise ImportError(
+                "Could not import `deepsparse` package. "
+                "Please install it with `pip install deepsparse`"
+            )
+
+        config = values["config"] or {}
+
+        values["pipeline"] = Pipeline.create(
+            task="text_generation",
+            model_path=values["model"],
+            **config,
+        )
+        return values
+
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Generate text from a prompt.
+        Args:
+            prompt: The prompt to generate text from.
+            stop: A list of strings to stop generation when encountered.
+        Returns:
+            The generated text.
+        Example:
+            .. code-block:: python
+                response = llm("Tell me a joke.")
+        """
+        text = self.pipeline(sequences=prompt).sequences[0]
+
+        if stop is not None:
+            text = enforce_stop_tokens(text, stop)
+        return "".join(text)
--- a/libs/langchain/tests/integration_tests/llms/test_deepsparse.py
+++ b/libs/langchain/tests/integration_tests/llms/test_deepsparse.py
@@ -0,0 +1,17 @@
+"""Test DeepSparse wrapper."""
+from langchain.llms import DeepSparse
+
+
+def test_deepsparse_call() -> None:
+    """Test valid call to DeepSparse."""
+    config = {"max_generated_tokens": 5, "use_deepsparse_cache": False}
+
+    llm = DeepSparse(
+        model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none",
+        config=config,
+    )
+
+    output = llm("def ")
+    assert isinstance(output, str)
+    assert len(output) > 1
+    assert output == "ids_to_names"