experimental[major]: Force users to opt-in into code that relies on the python repl (#22860)

This should make it obvious that a few of the agents in langchain experimental rely on the python REPL as a tool under the hood, and will force users to opt-in.
2025-08-14 23:26:34 +00:00 · 2024-06-13 15:41:24 -04:00 · 2024-06-13 15:41:24 -04:00 · ce0b0f22a1
commit ce0b0f22a1
parent 869523ad72
7 changed files with 168 additions and 13 deletions
--- a/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py
+++ b/libs/experimental/langchain_experimental/agents/agent_toolkits/pandas/base.py
@ -168,10 +168,23 @@ def create_pandas_dataframe_agent(
    number_of_head_rows: int = 5,
    extra_tools: Sequence[BaseTool] = (),
    engine: Literal["pandas", "modin"] = "pandas",
    allow_dangerous_code: bool = False,
    **kwargs: Any,
 ) -> AgentExecutor:
    """Construct a Pandas agent from an LLM and dataframe(s).
    Security Notice:
        This agent relies on access to a python repl tool which can execute
        arbitrary code. This can be dangerous and requires a specially sandboxed
        environment to be safely used. Failure to run this code in a properly
        sandboxed environment can lead to arbitrary code execution vulnerabilities,
        which can lead to data breaches, data loss, or other security incidents.
        Do not use this code with untrusted inputs, with elevated permissions,
        or without consulting your security team about proper sandboxing!
        You must opt-in to use this functionality by setting allow_dangerous_code=True.
    Args:
        llm: Language model to use for the agent. If agent_type is "tool-calling" then
            llm is expected to support tool calling.
@ -198,6 +211,16 @@ def create_pandas_dataframe_agent(
            include_df_in_prompt is True.
        extra_tools: Additional tools to give to agent on top of a PythonAstREPLTool.
        engine: One of "modin" or "pandas". Defaults to "pandas".
        allow_dangerous_code: bool, default False
            This agent relies on access to a python repl tool which can execute
            arbitrary code. This can be dangerous and requires a specially sandboxed
            environment to be safely used.
            Failure to properly sandbox this class can lead to arbitrary code execution
            vulnerabilities, which can lead to data breaches, data loss, or
            other security incidents.
            You must opt in to use this functionality by setting
            allow_dangerous_code=True.
        **kwargs: DEPRECATED. Not used, kept for backwards compatibility.
    Returns:
@ -221,6 +244,16 @@ def create_pandas_dataframe_agent(
            )
    """
    if not allow_dangerous_code:
        raise ValueError(
            "This agent relies on access to a python repl tool which can execute "
            "arbitrary code. This can be dangerous and requires a specially sandboxed "
            "environment to be safely used. Please read the security notice in the "
            "doc-string of this function. You must opt-in to use this functionality "
            "by setting allow_dangerous_code=True."
            "For general security guidelines, please see: "
            "https://python.langchain.com/v0.1/docs/security/"
        )
    try:
        if engine == "modin":
            import modin.pandas as pd
--- a/libs/experimental/langchain_experimental/agents/agent_toolkits/spark/base.py
+++ b/libs/experimental/langchain_experimental/agents/agent_toolkits/spark/base.py
@ -42,9 +42,44 @@ def create_spark_dataframe_agent(
    max_execution_time: Optional[float] = None,
    early_stopping_method: str = "force",
    agent_executor_kwargs: Optional[Dict[str, Any]] = None,
    allow_dangerous_code: bool = False,
    **kwargs: Any,
 ) -> AgentExecutor:
-    """Construct a Spark agent from an LLM and dataframe."""
+    """Construct a Spark agent from an LLM and dataframe.
    Security Notice:
        This agent relies on access to a python repl tool which can execute
        arbitrary code. This can be dangerous and requires a specially sandboxed
        environment to be safely used. Failure to run this code in a properly
        sandboxed environment can lead to arbitrary code execution vulnerabilities,
        which can lead to data breaches, data loss, or other security incidents.
        Do not use this code with untrusted inputs, with elevated permissions,
        or without consulting your security team about proper sandboxing!
        You must opt in to use this functionality by setting allow_dangerous_code=True.
    Args:
        allow_dangerous_code: bool, default False
            This agent relies on access to a python repl tool which can execute
            arbitrary code. This can be dangerous and requires a specially sandboxed
            environment to be safely used.
            Failure to properly sandbox this class can lead to arbitrary code execution
            vulnerabilities, which can lead to data breaches, data loss, or
            other security incidents.
            You must opt in to use this functionality by setting
            allow_dangerous_code=True.
    """
    if not allow_dangerous_code:
        raise ValueError(
            "This agent relies on access to a python repl tool which can execute "
            "arbitrary code. This can be dangerous and requires a specially sandboxed "
            "environment to be safely used. Please read the security notice in the "
            "doc-string of this function. You must opt-in to use this functionality "
            "by setting allow_dangerous_code=True."
            "For general security guidelines, please see: "
            "https://python.langchain.com/v0.1/docs/security/"
        )
    if not _validate_spark_df(df) and not _validate_spark_connect_df(df):
        raise ImportError("Spark is not installed. run `pip install pyspark`.")
--- a/libs/experimental/langchain_experimental/agents/agent_toolkits/xorbits/base.py
+++ b/libs/experimental/langchain_experimental/agents/agent_toolkits/xorbits/base.py
@ -29,9 +29,45 @@ def create_xorbits_agent(
    max_execution_time: Optional[float] = None,
    early_stopping_method: str = "force",
    agent_executor_kwargs: Optional[Dict[str, Any]] = None,
    allow_dangerous_code: bool = False,
    **kwargs: Dict[str, Any],
 ) -> AgentExecutor:
-    """Construct a xorbits agent from an LLM and dataframe."""
+    """Construct a xorbits agent from an LLM and dataframe.
    Security Notice:
        This agent relies on access to a python repl tool which can execute
        arbitrary code. This can be dangerous and requires a specially sandboxed
        environment to be safely used. Failure to run this code in a properly
        sandboxed environment can lead to arbitrary code execution vulnerabilities,
        which can lead to data breaches, data loss, or other security incidents.
        Do not use this code with untrusted inputs, with elevated permissions,
        or without consulting your security team about proper sandboxing!
        You must opt in to use this functionality by setting allow_dangerous_code=True.
    Args:
        allow_dangerous_code: bool, default False
            This agent relies on access to a python repl tool which can execute
            arbitrary code. This can be dangerous and requires a specially sandboxed
            environment to be safely used.
            Failure to properly sandbox this class can lead to arbitrary code execution
            vulnerabilities, which can lead to data breaches, data loss, or
            other security incidents.
            You must opt in to use this functionality by setting
            allow_dangerous_code=True.
    """
    if not allow_dangerous_code:
        raise ValueError(
            "This agent relies on access to a python repl tool which can execute "
            "arbitrary code. This can be dangerous and requires a specially sandboxed "
            "environment to be safely used. Please read the security notice in the "
            "doc-string of this function. You must opt-in to use this functionality "
            "by setting allow_dangerous_code=True."
            "For general security guidelines, please see: "
            "https://python.langchain.com/v0.1/docs/security/"
        )
    try:
        from xorbits import numpy as np
        from xorbits import pandas as pd
--- a/libs/experimental/langchain_experimental/pal_chain/base.py
+++ b/libs/experimental/langchain_experimental/pal_chain/base.py
@ -18,7 +18,7 @@ from langchain_core.language_models import BaseLanguageModel
 from langchain_experimental.pal_chain.colored_object_prompt import COLORED_OBJECT_PROMPT
 from langchain_experimental.pal_chain.math_prompt import MATH_PROMPT
-from langchain_experimental.pydantic_v1 import Extra, Field
+from langchain_experimental.pydantic_v1 import Extra, Field, root_validator
 COMMAND_EXECUTION_FUNCTIONS = ["system", "exec", "execfile", "eval", "__import__"]
 COMMAND_EXECUTION_ATTRIBUTES = [
@ -129,6 +129,36 @@ class PALChain(Chain):
    """Validations to perform on the generated code."""
    timeout: Optional[int] = 10
    """Timeout in seconds for the generated code to execute."""
    allow_dangerous_code: bool = False
    """This chain relies on the execution of generated code, which can be dangerous.
    This class implements an AI technique that generates and evaluates
    Python code, which can be dangerous and requires a specially sandboxed
    environment to be safely used. While this class implements some basic guardrails
    by limiting available locals/globals and by parsing and inspecting
    the generated Python AST using `PALValidation`, those guardrails will not
    deter sophisticated attackers and are not a replacement for a proper sandbox.
    Do not use this class on untrusted inputs, with elevated permissions,
    or without consulting your security team about proper sandboxing!
    Failure to properly sandbox this class can lead to arbitrary code execution
    vulnerabilities, which can lead to data breaches, data loss, or other security
    incidents.
    """
    @root_validator(pre=False, skip_on_failure=True)
    def post_init(cls, values: Dict) -> Dict:
        if not values["allow_dangerous_code"]:
            raise ValueError(
                "This chain relies on the execution of generated code, "
                "which can be dangerous. "
                "Please read the security notice for this class, and only "
                "use it if you understand the security implications. "
                "If you want to proceed, you will need to opt-in, by setting "
                "`allow_dangerous_code` to `True`."
            )
        return values
    class Config:
        """Configuration for this pydantic object."""
--- a/libs/experimental/tests/integration_tests/chains/test_pal.py
+++ b/libs/experimental/tests/integration_tests/chains/test_pal.py
@ -8,7 +8,7 @@ from langchain_experimental.pal_chain.base import PALChain
 def test_math_prompt() -> None:
    """Test math prompt."""
    llm = OpenAI(temperature=0, max_tokens=512)
-    pal_chain = PALChain.from_math_prompt(llm, timeout=None)
+    pal_chain = PALChain.from_math_prompt(llm, timeout=None, allow_dangerous_code=False)
    question = (
        "Jan has three times the number of pets as Marcia. "
        "Marcia has two more pets than Cindy. "
@ -21,7 +21,9 @@ def test_math_prompt() -> None:
 def test_colored_object_prompt() -> None:
    """Test colored object prompt."""
    llm = OpenAI(temperature=0, max_tokens=512)
-    pal_chain = PALChain.from_colored_object_prompt(llm, timeout=None)
+    pal_chain = PALChain.from_colored_object_prompt(
        llm, timeout=None, allow_dangerous_code=False
    )
    question = (
        "On the desk, you see two blue booklets, "
        "two purple booklets, and two yellow pairs of sunglasses. "
--- a/libs/experimental/tests/unit_tests/agents/agent_toolkits/pandas/test_base.py
+++ b/libs/experimental/tests/unit_tests/agents/agent_toolkits/pandas/test_base.py
@ -11,5 +11,12 @@ from tests.unit_tests.fake_llm import FakeLLM
 def test_create_pandas_dataframe_agent() -> None:
    import pandas as pd
-    create_pandas_dataframe_agent(FakeLLM(), pd.DataFrame())
+    with pytest.raises(ValueError):
-    create_pandas_dataframe_agent(FakeLLM(), [pd.DataFrame(), pd.DataFrame()])
+        create_pandas_dataframe_agent(
            FakeLLM(), pd.DataFrame(), allow_dangerous_code=False
        )
    create_pandas_dataframe_agent(FakeLLM(), pd.DataFrame(), allow_dangerous_code=True)
    create_pandas_dataframe_agent(
        FakeLLM(), [pd.DataFrame(), pd.DataFrame()], allow_dangerous_code=True
    )
--- a/libs/experimental/tests/unit_tests/test_pal.py
+++ b/libs/experimental/tests/unit_tests/test_pal.py
@ -189,7 +189,9 @@ def test_math_question_1() -> None:
    prompt = MATH_PROMPT.format(question=question)
    queries = {prompt: _MATH_SOLUTION_1}
    fake_llm = FakeLLM(queries=queries)
-    fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
+    fake_pal_chain = PALChain.from_math_prompt(
        fake_llm, timeout=None, allow_dangerous_code=True
    )
    output = fake_pal_chain.run(question)
    assert output == "8"
@ -202,7 +204,9 @@ def test_math_question_2() -> None:
    prompt = MATH_PROMPT.format(question=question)
    queries = {prompt: _MATH_SOLUTION_2}
    fake_llm = FakeLLM(queries=queries)
-    fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
+    fake_pal_chain = PALChain.from_math_prompt(
        fake_llm, timeout=None, allow_dangerous_code=True
    )
    output = fake_pal_chain.run(question)
    assert output == "33"
@ -214,7 +218,9 @@ def test_math_question_3() -> None:
    prompt = MATH_PROMPT.format(question=question)
    queries = {prompt: _MATH_SOLUTION_3}
    fake_llm = FakeLLM(queries=queries)
-    fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
+    fake_pal_chain = PALChain.from_math_prompt(
        fake_llm, timeout=None, allow_dangerous_code=True
    )
    with pytest.raises(ValueError) as exc_info:
        fake_pal_chain.run(question)
    assert (
@ -231,7 +237,9 @@ def test_math_question_infinite_loop() -> None:
    prompt = MATH_PROMPT.format(question=question)
    queries = {prompt: _MATH_SOLUTION_INFINITE_LOOP}
    fake_llm = FakeLLM(queries=queries)
-    fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=1)
+    fake_pal_chain = PALChain.from_math_prompt(
        fake_llm, timeout=1, allow_dangerous_code=True
    )
    output = fake_pal_chain.run(question)
    assert output == "Execution timed out"
@ -245,7 +253,9 @@ def test_color_question_1() -> None:
    prompt = COLORED_OBJECT_PROMPT.format(question=question)
    queries = {prompt: _COLORED_OBJECT_SOLUTION_1}
    fake_llm = FakeLLM(queries=queries)
-    fake_pal_chain = PALChain.from_colored_object_prompt(fake_llm, timeout=None)
+    fake_pal_chain = PALChain.from_colored_object_prompt(
        fake_llm, timeout=None, allow_dangerous_code=True
    )
    output = fake_pal_chain.run(question)
    assert output == "0"
@ -260,7 +270,9 @@ def test_color_question_2() -> None:
    prompt = COLORED_OBJECT_PROMPT.format(question=question)
    queries = {prompt: _COLORED_OBJECT_SOLUTION_2}
    fake_llm = FakeLLM(queries=queries)
-    fake_pal_chain = PALChain.from_colored_object_prompt(fake_llm, timeout=None)
+    fake_pal_chain = PALChain.from_colored_object_prompt(
        fake_llm, timeout=None, allow_dangerous_code=True
    )
    output = fake_pal_chain.run(question)
    assert output == "brown"