mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-14 15:16:21 +00:00
experimental[major]: Force users to opt-in into code that relies on the python repl (#22860)
This should make it obvious that a few of the agents in langchain experimental rely on the python REPL as a tool under the hood, and will force users to opt-in.
This commit is contained in:
parent
869523ad72
commit
ce0b0f22a1
@ -168,10 +168,23 @@ def create_pandas_dataframe_agent(
|
||||
number_of_head_rows: int = 5,
|
||||
extra_tools: Sequence[BaseTool] = (),
|
||||
engine: Literal["pandas", "modin"] = "pandas",
|
||||
allow_dangerous_code: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a Pandas agent from an LLM and dataframe(s).
|
||||
|
||||
Security Notice:
|
||||
This agent relies on access to a python repl tool which can execute
|
||||
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||
environment to be safely used. Failure to run this code in a properly
|
||||
sandboxed environment can lead to arbitrary code execution vulnerabilities,
|
||||
which can lead to data breaches, data loss, or other security incidents.
|
||||
|
||||
Do not use this code with untrusted inputs, with elevated permissions,
|
||||
or without consulting your security team about proper sandboxing!
|
||||
|
||||
You must opt-in to use this functionality by setting allow_dangerous_code=True.
|
||||
|
||||
Args:
|
||||
llm: Language model to use for the agent. If agent_type is "tool-calling" then
|
||||
llm is expected to support tool calling.
|
||||
@ -198,6 +211,16 @@ def create_pandas_dataframe_agent(
|
||||
include_df_in_prompt is True.
|
||||
extra_tools: Additional tools to give to agent on top of a PythonAstREPLTool.
|
||||
engine: One of "modin" or "pandas". Defaults to "pandas".
|
||||
allow_dangerous_code: bool, default False
|
||||
This agent relies on access to a python repl tool which can execute
|
||||
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||
environment to be safely used.
|
||||
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||
vulnerabilities, which can lead to data breaches, data loss, or
|
||||
other security incidents.
|
||||
You must opt in to use this functionality by setting
|
||||
allow_dangerous_code=True.
|
||||
|
||||
**kwargs: DEPRECATED. Not used, kept for backwards compatibility.
|
||||
|
||||
Returns:
|
||||
@ -221,6 +244,16 @@ def create_pandas_dataframe_agent(
|
||||
)
|
||||
|
||||
"""
|
||||
if not allow_dangerous_code:
|
||||
raise ValueError(
|
||||
"This agent relies on access to a python repl tool which can execute "
|
||||
"arbitrary code. This can be dangerous and requires a specially sandboxed "
|
||||
"environment to be safely used. Please read the security notice in the "
|
||||
"doc-string of this function. You must opt-in to use this functionality "
|
||||
"by setting allow_dangerous_code=True."
|
||||
"For general security guidelines, please see: "
|
||||
"https://python.langchain.com/v0.1/docs/security/"
|
||||
)
|
||||
try:
|
||||
if engine == "modin":
|
||||
import modin.pandas as pd
|
||||
|
@ -42,9 +42,44 @@ def create_spark_dataframe_agent(
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
allow_dangerous_code: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
"""Construct a Spark agent from an LLM and dataframe."""
|
||||
"""Construct a Spark agent from an LLM and dataframe.
|
||||
|
||||
Security Notice:
|
||||
This agent relies on access to a python repl tool which can execute
|
||||
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||
environment to be safely used. Failure to run this code in a properly
|
||||
sandboxed environment can lead to arbitrary code execution vulnerabilities,
|
||||
which can lead to data breaches, data loss, or other security incidents.
|
||||
|
||||
Do not use this code with untrusted inputs, with elevated permissions,
|
||||
or without consulting your security team about proper sandboxing!
|
||||
|
||||
You must opt in to use this functionality by setting allow_dangerous_code=True.
|
||||
|
||||
Args:
|
||||
allow_dangerous_code: bool, default False
|
||||
This agent relies on access to a python repl tool which can execute
|
||||
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||
environment to be safely used.
|
||||
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||
vulnerabilities, which can lead to data breaches, data loss, or
|
||||
other security incidents.
|
||||
You must opt in to use this functionality by setting
|
||||
allow_dangerous_code=True.
|
||||
"""
|
||||
if not allow_dangerous_code:
|
||||
raise ValueError(
|
||||
"This agent relies on access to a python repl tool which can execute "
|
||||
"arbitrary code. This can be dangerous and requires a specially sandboxed "
|
||||
"environment to be safely used. Please read the security notice in the "
|
||||
"doc-string of this function. You must opt-in to use this functionality "
|
||||
"by setting allow_dangerous_code=True."
|
||||
"For general security guidelines, please see: "
|
||||
"https://python.langchain.com/v0.1/docs/security/"
|
||||
)
|
||||
|
||||
if not _validate_spark_df(df) and not _validate_spark_connect_df(df):
|
||||
raise ImportError("Spark is not installed. run `pip install pyspark`.")
|
||||
|
@ -29,9 +29,45 @@ def create_xorbits_agent(
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
allow_dangerous_code: bool = False,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> AgentExecutor:
|
||||
"""Construct a xorbits agent from an LLM and dataframe."""
|
||||
"""Construct a xorbits agent from an LLM and dataframe.
|
||||
|
||||
Security Notice:
|
||||
This agent relies on access to a python repl tool which can execute
|
||||
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||
environment to be safely used. Failure to run this code in a properly
|
||||
sandboxed environment can lead to arbitrary code execution vulnerabilities,
|
||||
which can lead to data breaches, data loss, or other security incidents.
|
||||
|
||||
Do not use this code with untrusted inputs, with elevated permissions,
|
||||
or without consulting your security team about proper sandboxing!
|
||||
|
||||
You must opt in to use this functionality by setting allow_dangerous_code=True.
|
||||
|
||||
Args:
|
||||
allow_dangerous_code: bool, default False
|
||||
This agent relies on access to a python repl tool which can execute
|
||||
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||
environment to be safely used.
|
||||
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||
vulnerabilities, which can lead to data breaches, data loss, or
|
||||
other security incidents.
|
||||
You must opt in to use this functionality by setting
|
||||
allow_dangerous_code=True.
|
||||
"""
|
||||
if not allow_dangerous_code:
|
||||
raise ValueError(
|
||||
"This agent relies on access to a python repl tool which can execute "
|
||||
"arbitrary code. This can be dangerous and requires a specially sandboxed "
|
||||
"environment to be safely used. Please read the security notice in the "
|
||||
"doc-string of this function. You must opt-in to use this functionality "
|
||||
"by setting allow_dangerous_code=True."
|
||||
"For general security guidelines, please see: "
|
||||
"https://python.langchain.com/v0.1/docs/security/"
|
||||
)
|
||||
|
||||
try:
|
||||
from xorbits import numpy as np
|
||||
from xorbits import pandas as pd
|
||||
|
@ -18,7 +18,7 @@ from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
from langchain_experimental.pal_chain.colored_object_prompt import COLORED_OBJECT_PROMPT
|
||||
from langchain_experimental.pal_chain.math_prompt import MATH_PROMPT
|
||||
from langchain_experimental.pydantic_v1 import Extra, Field
|
||||
from langchain_experimental.pydantic_v1 import Extra, Field, root_validator
|
||||
|
||||
COMMAND_EXECUTION_FUNCTIONS = ["system", "exec", "execfile", "eval", "__import__"]
|
||||
COMMAND_EXECUTION_ATTRIBUTES = [
|
||||
@ -129,6 +129,36 @@ class PALChain(Chain):
|
||||
"""Validations to perform on the generated code."""
|
||||
timeout: Optional[int] = 10
|
||||
"""Timeout in seconds for the generated code to execute."""
|
||||
allow_dangerous_code: bool = False
|
||||
"""This chain relies on the execution of generated code, which can be dangerous.
|
||||
|
||||
This class implements an AI technique that generates and evaluates
|
||||
Python code, which can be dangerous and requires a specially sandboxed
|
||||
environment to be safely used. While this class implements some basic guardrails
|
||||
by limiting available locals/globals and by parsing and inspecting
|
||||
the generated Python AST using `PALValidation`, those guardrails will not
|
||||
deter sophisticated attackers and are not a replacement for a proper sandbox.
|
||||
Do not use this class on untrusted inputs, with elevated permissions,
|
||||
or without consulting your security team about proper sandboxing!
|
||||
|
||||
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||
vulnerabilities, which can lead to data breaches, data loss, or other security
|
||||
incidents.
|
||||
"""
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def post_init(cls, values: Dict) -> Dict:
|
||||
if not values["allow_dangerous_code"]:
|
||||
raise ValueError(
|
||||
"This chain relies on the execution of generated code, "
|
||||
"which can be dangerous. "
|
||||
"Please read the security notice for this class, and only "
|
||||
"use it if you understand the security implications. "
|
||||
"If you want to proceed, you will need to opt-in, by setting "
|
||||
"`allow_dangerous_code` to `True`."
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
@ -8,7 +8,7 @@ from langchain_experimental.pal_chain.base import PALChain
|
||||
def test_math_prompt() -> None:
|
||||
"""Test math prompt."""
|
||||
llm = OpenAI(temperature=0, max_tokens=512)
|
||||
pal_chain = PALChain.from_math_prompt(llm, timeout=None)
|
||||
pal_chain = PALChain.from_math_prompt(llm, timeout=None, allow_dangerous_code=False)
|
||||
question = (
|
||||
"Jan has three times the number of pets as Marcia. "
|
||||
"Marcia has two more pets than Cindy. "
|
||||
@ -21,7 +21,9 @@ def test_math_prompt() -> None:
|
||||
def test_colored_object_prompt() -> None:
|
||||
"""Test colored object prompt."""
|
||||
llm = OpenAI(temperature=0, max_tokens=512)
|
||||
pal_chain = PALChain.from_colored_object_prompt(llm, timeout=None)
|
||||
pal_chain = PALChain.from_colored_object_prompt(
|
||||
llm, timeout=None, allow_dangerous_code=False
|
||||
)
|
||||
question = (
|
||||
"On the desk, you see two blue booklets, "
|
||||
"two purple booklets, and two yellow pairs of sunglasses. "
|
||||
|
@ -11,5 +11,12 @@ from tests.unit_tests.fake_llm import FakeLLM
|
||||
def test_create_pandas_dataframe_agent() -> None:
|
||||
import pandas as pd
|
||||
|
||||
create_pandas_dataframe_agent(FakeLLM(), pd.DataFrame())
|
||||
create_pandas_dataframe_agent(FakeLLM(), [pd.DataFrame(), pd.DataFrame()])
|
||||
with pytest.raises(ValueError):
|
||||
create_pandas_dataframe_agent(
|
||||
FakeLLM(), pd.DataFrame(), allow_dangerous_code=False
|
||||
)
|
||||
|
||||
create_pandas_dataframe_agent(FakeLLM(), pd.DataFrame(), allow_dangerous_code=True)
|
||||
create_pandas_dataframe_agent(
|
||||
FakeLLM(), [pd.DataFrame(), pd.DataFrame()], allow_dangerous_code=True
|
||||
)
|
||||
|
@ -189,7 +189,9 @@ def test_math_question_1() -> None:
|
||||
prompt = MATH_PROMPT.format(question=question)
|
||||
queries = {prompt: _MATH_SOLUTION_1}
|
||||
fake_llm = FakeLLM(queries=queries)
|
||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
|
||||
fake_pal_chain = PALChain.from_math_prompt(
|
||||
fake_llm, timeout=None, allow_dangerous_code=True
|
||||
)
|
||||
output = fake_pal_chain.run(question)
|
||||
assert output == "8"
|
||||
|
||||
@ -202,7 +204,9 @@ def test_math_question_2() -> None:
|
||||
prompt = MATH_PROMPT.format(question=question)
|
||||
queries = {prompt: _MATH_SOLUTION_2}
|
||||
fake_llm = FakeLLM(queries=queries)
|
||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
|
||||
fake_pal_chain = PALChain.from_math_prompt(
|
||||
fake_llm, timeout=None, allow_dangerous_code=True
|
||||
)
|
||||
output = fake_pal_chain.run(question)
|
||||
assert output == "33"
|
||||
|
||||
@ -214,7 +218,9 @@ def test_math_question_3() -> None:
|
||||
prompt = MATH_PROMPT.format(question=question)
|
||||
queries = {prompt: _MATH_SOLUTION_3}
|
||||
fake_llm = FakeLLM(queries=queries)
|
||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
|
||||
fake_pal_chain = PALChain.from_math_prompt(
|
||||
fake_llm, timeout=None, allow_dangerous_code=True
|
||||
)
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
fake_pal_chain.run(question)
|
||||
assert (
|
||||
@ -231,7 +237,9 @@ def test_math_question_infinite_loop() -> None:
|
||||
prompt = MATH_PROMPT.format(question=question)
|
||||
queries = {prompt: _MATH_SOLUTION_INFINITE_LOOP}
|
||||
fake_llm = FakeLLM(queries=queries)
|
||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=1)
|
||||
fake_pal_chain = PALChain.from_math_prompt(
|
||||
fake_llm, timeout=1, allow_dangerous_code=True
|
||||
)
|
||||
output = fake_pal_chain.run(question)
|
||||
assert output == "Execution timed out"
|
||||
|
||||
@ -245,7 +253,9 @@ def test_color_question_1() -> None:
|
||||
prompt = COLORED_OBJECT_PROMPT.format(question=question)
|
||||
queries = {prompt: _COLORED_OBJECT_SOLUTION_1}
|
||||
fake_llm = FakeLLM(queries=queries)
|
||||
fake_pal_chain = PALChain.from_colored_object_prompt(fake_llm, timeout=None)
|
||||
fake_pal_chain = PALChain.from_colored_object_prompt(
|
||||
fake_llm, timeout=None, allow_dangerous_code=True
|
||||
)
|
||||
output = fake_pal_chain.run(question)
|
||||
assert output == "0"
|
||||
|
||||
@ -260,7 +270,9 @@ def test_color_question_2() -> None:
|
||||
prompt = COLORED_OBJECT_PROMPT.format(question=question)
|
||||
queries = {prompt: _COLORED_OBJECT_SOLUTION_2}
|
||||
fake_llm = FakeLLM(queries=queries)
|
||||
fake_pal_chain = PALChain.from_colored_object_prompt(fake_llm, timeout=None)
|
||||
fake_pal_chain = PALChain.from_colored_object_prompt(
|
||||
fake_llm, timeout=None, allow_dangerous_code=True
|
||||
)
|
||||
output = fake_pal_chain.run(question)
|
||||
assert output == "brown"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user