community[minor]: Allow passing allow_dangerous_deserialization when loading LLM chain (#18894)

### Issue
Recently, the new `allow_dangerous_deserialization` flag was introduced
for preventing unsafe model deserialization that relies on pickle
without user's notice (#18696). Since then some LLMs like Databricks
requires passing in this flag with true to instantiate the model.

However, this breaks existing functionality to loading such LLMs within
a chain using `load_chain` method, because the underlying loader
function
[load_llm_from_config](f96dd57501/libs/langchain/langchain/chains/loading.py (L40))
 (and load_llm) ignores keyword arguments passed in. 

### Solution
This PR fixes this issue by propagating the
`allow_dangerous_deserialization` argument to the class loader iff the
LLM class has that field.

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
Yuki Watanabe
2024-03-27 00:07:55 +09:00
committed by GitHub
parent d7c14cb6f9
commit cfecbda48b
3 changed files with 130 additions and 64 deletions

View File

@@ -1,15 +1,17 @@
"""Base interface for loading large language model APIs."""
import json
from pathlib import Path
from typing import Union
from typing import Any, Union
import yaml
from langchain_core.language_models.llms import BaseLLM
from langchain_community.llms import get_type_to_cls_dict
_ALLOW_DANGEROUS_DESERIALIZATION_ARG = "allow_dangerous_deserialization"
def load_llm_from_config(config: dict) -> BaseLLM:
def load_llm_from_config(config: dict, **kwargs: Any) -> BaseLLM:
"""Load LLM from Config Dict."""
if "_type" not in config:
raise ValueError("Must specify an LLM Type in config")
@@ -21,11 +23,17 @@ def load_llm_from_config(config: dict) -> BaseLLM:
raise ValueError(f"Loading {config_type} LLM not supported")
llm_cls = type_to_cls_dict[config_type]()
return llm_cls(**config)
load_kwargs = {}
if _ALLOW_DANGEROUS_DESERIALIZATION_ARG in llm_cls.__fields__:
load_kwargs[_ALLOW_DANGEROUS_DESERIALIZATION_ARG] = kwargs.get(
_ALLOW_DANGEROUS_DESERIALIZATION_ARG, False
)
return llm_cls(**config, **load_kwargs)
def load_llm(file: Union[str, Path]) -> BaseLLM:
"""Load LLM from file."""
def load_llm(file: Union[str, Path], **kwargs: Any) -> BaseLLM:
# Convert file to Path object.
if isinstance(file, str):
file_path = Path(file)
@@ -41,4 +49,4 @@ def load_llm(file: Union[str, Path]) -> BaseLLM:
else:
raise ValueError("File type must be json or yaml")
# Load the LLM from the config now.
return load_llm_from_config(config)
return load_llm_from_config(config, **kwargs)

View File

@@ -1,4 +1,5 @@
"""test Databricks LLM"""
from pathlib import Path
from typing import Any, Dict
import pytest
@@ -8,6 +9,8 @@ from langchain_community.llms.databricks import (
Databricks,
_load_pickled_fn_from_hex_string,
)
from langchain_community.llms.loading import load_llm
from tests.integration_tests.llms.utils import assert_llm_equality
class MockDatabricksServingEndpointClient:
@@ -55,3 +58,26 @@ def test_serde_transform_input_fn(monkeypatch: MonkeyPatch) -> None:
request = {"prompt": "What is the meaning of life?"}
fn = _load_pickled_fn_from_hex_string(params["transform_input_fn"])
assert fn(**request) == transform_input(**request)
def test_saving_loading_llm(monkeypatch: MonkeyPatch, tmp_path: Path) -> None:
monkeypatch.setattr(
"langchain_community.llms.databricks._DatabricksServingEndpointClient",
MockDatabricksServingEndpointClient,
)
monkeypatch.setenv("DATABRICKS_HOST", "my-default-host")
monkeypatch.setenv("DATABRICKS_TOKEN", "my-default-token")
llm = Databricks(
endpoint_name="chat", temperature=0.1, allow_dangerous_deserialization=True
)
llm.save(file_path=tmp_path / "databricks.yaml")
# Loading without allowing_dangerous_deserialization=True should raise an error.
with pytest.raises(ValueError, match="This code relies on the pickle module."):
load_llm(tmp_path / "databricks.yaml")
loaded_llm = load_llm(
tmp_path / "databricks.yaml", allow_dangerous_deserialization=True
)
assert_llm_equality(llm, loaded_llm)