community[major]: breaking change in some APIs to force users to opt-in for pickling (#18696)

This is a PR that adds a dangerous load parameter to force users to opt in to use pickle. This is a PR that's meant to raise user awareness that the pickling module is involved.
2025-09-04 12:39:32 +00:00 · 2024-03-06 16:43:01 -05:00
parent 0e52961562
commit 4c25b49229
10 changed files with 128 additions and 7 deletions
--- a/libs/community/langchain_community/llms/self_hosted.py
+++ b/libs/community/langchain_community/llms/self_hosted.py
@@ -137,6 +137,11 @@ class SelfHostedPipeline(LLM):
    model_reqs: List[str] = ["./", "torch"]
    """Requirements to install on hardware to inference the model."""

+    allow_dangerous_deserialization: bool = False
+    """Allow deserialization using pickle which can be dangerous if 
+    loading compromised data.
+    """
+
    class Config:
        """Configuration for this pydantic object."""

@@ -149,6 +154,16 @@ class SelfHostedPipeline(LLM):
        and run on the server, i.e. in a module and not a REPL or closure.
        Then, initialize the remote inference function.
        """
+        if not kwargs.get("allow_dangerous_deserialization"):
+            raise ValueError(
+                "SelfHostedPipeline relies on the pickle module. "
+                "You will need to set allow_dangerous_deserialization=True "
+                "if you want to opt-in to allow deserialization of data using pickle."
+                "Data can be compromised by a malicious actor if "
+                "not handled properly to include "
+                "a malicious payload that when deserialized with "
+                "pickle can execute arbitrary code. "
+            )
        super().__init__(**kwargs)
        try:
            import runhouse as rh