community[minor]: Allow passing allow_dangerous_deserialization when loading LLM chain (#18894)

### Issue Recently, the new `allow_dangerous_deserialization` flag was introduced for preventing unsafe model deserialization that relies on pickle without user's notice (#18696). Since then some LLMs like Databricks requires passing in this flag with true to instantiate the model. However, this breaks existing functionality to loading such LLMs within a chain using `load_chain` method, because the underlying loader function [load_llm_from_config](f96dd57501/libs/langchain/langchain/chains/loading.py (L40)) (and load_llm) ignores keyword arguments passed in. ### Solution This PR fixes this issue by propagating the `allow_dangerous_deserialization` argument to the class loader iff the LLM class has that field. --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
2025-09-16 06:53:16 +00:00 · 2024-03-27 00:07:55 +09:00
parent d7c14cb6f9
commit cfecbda48b
3 changed files with 130 additions and 64 deletions
--- a/libs/community/langchain_community/llms/loading.py
+++ b/libs/community/langchain_community/llms/loading.py
@@ -1,15 +1,17 @@
 """Base interface for loading large language model APIs."""
 import json
 from pathlib import Path
-from typing import Union
+from typing import Any, Union

 import yaml
 from langchain_core.language_models.llms import BaseLLM

 from langchain_community.llms import get_type_to_cls_dict

+_ALLOW_DANGEROUS_DESERIALIZATION_ARG = "allow_dangerous_deserialization"

-def load_llm_from_config(config: dict) -> BaseLLM:
+
+def load_llm_from_config(config: dict, **kwargs: Any) -> BaseLLM:
    """Load LLM from Config Dict."""
    if "_type" not in config:
        raise ValueError("Must specify an LLM Type in config")
@@ -21,11 +23,17 @@ def load_llm_from_config(config: dict) -> BaseLLM:
        raise ValueError(f"Loading {config_type} LLM not supported")

    llm_cls = type_to_cls_dict[config_type]()
-    return llm_cls(**config)
+
+    load_kwargs = {}
+    if _ALLOW_DANGEROUS_DESERIALIZATION_ARG in llm_cls.__fields__:
+        load_kwargs[_ALLOW_DANGEROUS_DESERIALIZATION_ARG] = kwargs.get(
+            _ALLOW_DANGEROUS_DESERIALIZATION_ARG, False
+        )
+
+    return llm_cls(**config, **load_kwargs)


-def load_llm(file: Union[str, Path]) -> BaseLLM:
-    """Load LLM from file."""
+def load_llm(file: Union[str, Path], **kwargs: Any) -> BaseLLM:
    # Convert file to Path object.
    if isinstance(file, str):
        file_path = Path(file)
@@ -41,4 +49,4 @@ def load_llm(file: Union[str, Path]) -> BaseLLM:
    else:
        raise ValueError("File type must be json or yaml")
    # Load the LLM from the config now.
-    return load_llm_from_config(config)
+    return load_llm_from_config(config, **kwargs)
--- a/libs/community/tests/unit_tests/llms/test_databricks.py
+++ b/libs/community/tests/unit_tests/llms/test_databricks.py
@@ -1,4 +1,5 @@
 """test Databricks LLM"""
+from pathlib import Path
 from typing import Any, Dict

 import pytest
@@ -8,6 +9,8 @@ from langchain_community.llms.databricks import (
    Databricks,
    _load_pickled_fn_from_hex_string,
 )
+from langchain_community.llms.loading import load_llm
+from tests.integration_tests.llms.utils import assert_llm_equality


 class MockDatabricksServingEndpointClient:
@@ -55,3 +58,26 @@ def test_serde_transform_input_fn(monkeypatch: MonkeyPatch) -> None:
    request = {"prompt": "What is the meaning of life?"}
    fn = _load_pickled_fn_from_hex_string(params["transform_input_fn"])
    assert fn(**request) == transform_input(**request)
+
+
+def test_saving_loading_llm(monkeypatch: MonkeyPatch, tmp_path: Path) -> None:
+    monkeypatch.setattr(
+        "langchain_community.llms.databricks._DatabricksServingEndpointClient",
+        MockDatabricksServingEndpointClient,
+    )
+    monkeypatch.setenv("DATABRICKS_HOST", "my-default-host")
+    monkeypatch.setenv("DATABRICKS_TOKEN", "my-default-token")
+
+    llm = Databricks(
+        endpoint_name="chat", temperature=0.1, allow_dangerous_deserialization=True
+    )
+    llm.save(file_path=tmp_path / "databricks.yaml")
+
+    # Loading without allowing_dangerous_deserialization=True should raise an error.
+    with pytest.raises(ValueError, match="This code relies on the pickle module."):
+        load_llm(tmp_path / "databricks.yaml")
+
+    loaded_llm = load_llm(
+        tmp_path / "databricks.yaml", allow_dangerous_deserialization=True
+    )
+    assert_llm_equality(llm, loaded_llm)