community[minor]: Add openvino backend support (#11591)

- **Description:** add openvino backend support by HuggingFace Optimum Intel, - **Dependencies:** “optimum[openvino]”, --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-15 22:44:36 +00:00 · 2024-03-01 10:04:24 -08:00
parent a89f007947
commit f61cb8d407
4 changed files with 212 additions and 7 deletions
--- a/libs/community/langchain_community/llms/huggingface_pipeline.py
+++ b/libs/community/langchain_community/llms/huggingface_pipeline.py
@@ -68,6 +68,7 @@ class HuggingFacePipeline(BaseLLM):
        cls,
        model_id: str,
        task: str,
+        backend: str = "default",
        device: Optional[int] = -1,
        device_map: Optional[str] = None,
        model_kwargs: Optional[dict] = None,
@@ -95,9 +96,57 @@ class HuggingFacePipeline(BaseLLM):

        try:
            if task == "text-generation":
-                model = AutoModelForCausalLM.from_pretrained(model_id, **_model_kwargs)
+                if backend == "openvino":
+                    try:
+                        from optimum.intel.openvino import OVModelForCausalLM
+
+                    except ImportError:
+                        raise ValueError(
+                            "Could not import optimum-intel python package. "
+                            "Please install it with: "
+                            "pip install 'optimum[openvino,nncf]' "
+                        )
+                    try:
+                        # use local model
+                        model = OVModelForCausalLM.from_pretrained(
+                            model_id, **_model_kwargs
+                        )
+
+                    except Exception:
+                        # use remote model
+                        model = OVModelForCausalLM.from_pretrained(
+                            model_id, export=True, **_model_kwargs
+                        )
+                else:
+                    model = AutoModelForCausalLM.from_pretrained(
+                        model_id, **_model_kwargs
+                    )
            elif task in ("text2text-generation", "summarization"):
-                model = AutoModelForSeq2SeqLM.from_pretrained(model_id, **_model_kwargs)
+                if backend == "openvino":
+                    try:
+                        from optimum.intel.openvino import OVModelForSeq2SeqLM
+
+                    except ImportError:
+                        raise ValueError(
+                            "Could not import optimum-intel python package. "
+                            "Please install it with: "
+                            "pip install 'optimum[openvino,nncf]' "
+                        )
+                    try:
+                        # use local model
+                        model = OVModelForSeq2SeqLM.from_pretrained(
+                            model_id, **_model_kwargs
+                        )
+
+                    except Exception:
+                        # use remote model
+                        model = OVModelForSeq2SeqLM.from_pretrained(
+                            model_id, export=True, **_model_kwargs
+                        )
+                else:
+                    model = AutoModelForSeq2SeqLM.from_pretrained(
+                        model_id, **_model_kwargs
+                    )
            else:
                raise ValueError(
                    f"Got invalid task {task}, "
@@ -112,9 +161,13 @@ class HuggingFacePipeline(BaseLLM):
            tokenizer.pad_token_id = model.config.eos_token_id

        if (
-            getattr(model, "is_loaded_in_4bit", False)
-            or getattr(model, "is_loaded_in_8bit", False)
-        ) and device is not None:
+            (
+                getattr(model, "is_loaded_in_4bit", False)
+                or getattr(model, "is_loaded_in_8bit", False)
+            )
+            and device is not None
+            and backend == "default"
+        ):
            logger.warning(
                f"Setting the `device` argument to None from {device} to avoid "
                "the error caused by attempting to move the model that was already "
@@ -123,7 +176,11 @@ class HuggingFacePipeline(BaseLLM):
            )
            device = None

-        if device is not None and importlib.util.find_spec("torch") is not None:
+        if (
+            device is not None
+            and importlib.util.find_spec("torch") is not None
+            and backend == "default"
+        ):
            import torch

            cuda_device_count = torch.cuda.device_count()
@@ -142,6 +199,8 @@ class HuggingFacePipeline(BaseLLM):
                    "can be a positive integer associated with CUDA device id.",
                    cuda_device_count,
                )
+        if device is not None and device_map is not None and backend == "openvino":
+            logger.warning("Please set device for OpenVINO through: " "'model_kwargs'")
        if "trust_remote_code" in _model_kwargs:
            _model_kwargs = {
                k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
--- a/libs/community/tests/integration_tests/llms/test_huggingface_pipeline.py
+++ b/libs/community/tests/integration_tests/llms/test_huggingface_pipeline.py
@@ -80,3 +80,45 @@ def test_huggingface_pipeline_runtime_kwargs() -> None:
    prompt = "Say foo:"
    output = llm(prompt, pipeline_kwargs={"max_new_tokens": 2})
    assert len(output) < 10
+
+
+ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
+
+
+def test_huggingface_pipeline_text_generation_ov() -> None:
+    """Test valid call to HuggingFace text generation model with openvino."""
+    llm = HuggingFacePipeline.from_model_id(
+        model_id="gpt2",
+        task="text-generation",
+        backend="openvino",
+        model_kwargs={"device": "CPU", "ov_config": ov_config},
+        pipeline_kwargs={"max_new_tokens": 64},
+    )
+    output = llm("Say foo:")
+    assert isinstance(output, str)
+
+
+def test_huggingface_pipeline_text2text_generation_ov() -> None:
+    """Test valid call to HuggingFace text2text generation model with openvino."""
+    llm = HuggingFacePipeline.from_model_id(
+        model_id="google/flan-t5-small",
+        task="text2text-generation",
+        backend="openvino",
+        model_kwargs={"device": "CPU", "ov_config": ov_config},
+        pipeline_kwargs={"max_new_tokens": 64},
+    )
+    output = llm("Say foo:")
+    assert isinstance(output, str)
+
+
+def text_huggingface_pipeline_summarization_ov() -> None:
+    """Test valid call to HuggingFace summarization model with openvino."""
+    llm = HuggingFacePipeline.from_model_id(
+        model_id="facebook/bart-large-cnn",
+        task="summarization",
+        backend="openvino",
+        model_kwargs={"device": "CPU", "ov_config": ov_config},
+        pipeline_kwargs={"max_new_tokens": 64},
+    )
+    output = llm("Say foo:")
+    assert isinstance(output, str)