mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-14 23:26:34 +00:00
experimental[major]: Force users to opt-in into code that relies on the python repl (#22860)
This should make it obvious that a few of the agents in langchain experimental rely on the python REPL as a tool under the hood, and will force users to opt-in.
This commit is contained in:
parent
869523ad72
commit
ce0b0f22a1
@ -168,10 +168,23 @@ def create_pandas_dataframe_agent(
|
|||||||
number_of_head_rows: int = 5,
|
number_of_head_rows: int = 5,
|
||||||
extra_tools: Sequence[BaseTool] = (),
|
extra_tools: Sequence[BaseTool] = (),
|
||||||
engine: Literal["pandas", "modin"] = "pandas",
|
engine: Literal["pandas", "modin"] = "pandas",
|
||||||
|
allow_dangerous_code: bool = False,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> AgentExecutor:
|
) -> AgentExecutor:
|
||||||
"""Construct a Pandas agent from an LLM and dataframe(s).
|
"""Construct a Pandas agent from an LLM and dataframe(s).
|
||||||
|
|
||||||
|
Security Notice:
|
||||||
|
This agent relies on access to a python repl tool which can execute
|
||||||
|
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||||
|
environment to be safely used. Failure to run this code in a properly
|
||||||
|
sandboxed environment can lead to arbitrary code execution vulnerabilities,
|
||||||
|
which can lead to data breaches, data loss, or other security incidents.
|
||||||
|
|
||||||
|
Do not use this code with untrusted inputs, with elevated permissions,
|
||||||
|
or without consulting your security team about proper sandboxing!
|
||||||
|
|
||||||
|
You must opt-in to use this functionality by setting allow_dangerous_code=True.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
llm: Language model to use for the agent. If agent_type is "tool-calling" then
|
llm: Language model to use for the agent. If agent_type is "tool-calling" then
|
||||||
llm is expected to support tool calling.
|
llm is expected to support tool calling.
|
||||||
@ -198,6 +211,16 @@ def create_pandas_dataframe_agent(
|
|||||||
include_df_in_prompt is True.
|
include_df_in_prompt is True.
|
||||||
extra_tools: Additional tools to give to agent on top of a PythonAstREPLTool.
|
extra_tools: Additional tools to give to agent on top of a PythonAstREPLTool.
|
||||||
engine: One of "modin" or "pandas". Defaults to "pandas".
|
engine: One of "modin" or "pandas". Defaults to "pandas".
|
||||||
|
allow_dangerous_code: bool, default False
|
||||||
|
This agent relies on access to a python repl tool which can execute
|
||||||
|
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||||
|
environment to be safely used.
|
||||||
|
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||||
|
vulnerabilities, which can lead to data breaches, data loss, or
|
||||||
|
other security incidents.
|
||||||
|
You must opt in to use this functionality by setting
|
||||||
|
allow_dangerous_code=True.
|
||||||
|
|
||||||
**kwargs: DEPRECATED. Not used, kept for backwards compatibility.
|
**kwargs: DEPRECATED. Not used, kept for backwards compatibility.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -221,6 +244,16 @@ def create_pandas_dataframe_agent(
|
|||||||
)
|
)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
if not allow_dangerous_code:
|
||||||
|
raise ValueError(
|
||||||
|
"This agent relies on access to a python repl tool which can execute "
|
||||||
|
"arbitrary code. This can be dangerous and requires a specially sandboxed "
|
||||||
|
"environment to be safely used. Please read the security notice in the "
|
||||||
|
"doc-string of this function. You must opt-in to use this functionality "
|
||||||
|
"by setting allow_dangerous_code=True."
|
||||||
|
"For general security guidelines, please see: "
|
||||||
|
"https://python.langchain.com/v0.1/docs/security/"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
if engine == "modin":
|
if engine == "modin":
|
||||||
import modin.pandas as pd
|
import modin.pandas as pd
|
||||||
|
@ -42,9 +42,44 @@ def create_spark_dataframe_agent(
|
|||||||
max_execution_time: Optional[float] = None,
|
max_execution_time: Optional[float] = None,
|
||||||
early_stopping_method: str = "force",
|
early_stopping_method: str = "force",
|
||||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
|
allow_dangerous_code: bool = False,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> AgentExecutor:
|
) -> AgentExecutor:
|
||||||
"""Construct a Spark agent from an LLM and dataframe."""
|
"""Construct a Spark agent from an LLM and dataframe.
|
||||||
|
|
||||||
|
Security Notice:
|
||||||
|
This agent relies on access to a python repl tool which can execute
|
||||||
|
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||||
|
environment to be safely used. Failure to run this code in a properly
|
||||||
|
sandboxed environment can lead to arbitrary code execution vulnerabilities,
|
||||||
|
which can lead to data breaches, data loss, or other security incidents.
|
||||||
|
|
||||||
|
Do not use this code with untrusted inputs, with elevated permissions,
|
||||||
|
or without consulting your security team about proper sandboxing!
|
||||||
|
|
||||||
|
You must opt in to use this functionality by setting allow_dangerous_code=True.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
allow_dangerous_code: bool, default False
|
||||||
|
This agent relies on access to a python repl tool which can execute
|
||||||
|
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||||
|
environment to be safely used.
|
||||||
|
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||||
|
vulnerabilities, which can lead to data breaches, data loss, or
|
||||||
|
other security incidents.
|
||||||
|
You must opt in to use this functionality by setting
|
||||||
|
allow_dangerous_code=True.
|
||||||
|
"""
|
||||||
|
if not allow_dangerous_code:
|
||||||
|
raise ValueError(
|
||||||
|
"This agent relies on access to a python repl tool which can execute "
|
||||||
|
"arbitrary code. This can be dangerous and requires a specially sandboxed "
|
||||||
|
"environment to be safely used. Please read the security notice in the "
|
||||||
|
"doc-string of this function. You must opt-in to use this functionality "
|
||||||
|
"by setting allow_dangerous_code=True."
|
||||||
|
"For general security guidelines, please see: "
|
||||||
|
"https://python.langchain.com/v0.1/docs/security/"
|
||||||
|
)
|
||||||
|
|
||||||
if not _validate_spark_df(df) and not _validate_spark_connect_df(df):
|
if not _validate_spark_df(df) and not _validate_spark_connect_df(df):
|
||||||
raise ImportError("Spark is not installed. run `pip install pyspark`.")
|
raise ImportError("Spark is not installed. run `pip install pyspark`.")
|
||||||
|
@ -29,9 +29,45 @@ def create_xorbits_agent(
|
|||||||
max_execution_time: Optional[float] = None,
|
max_execution_time: Optional[float] = None,
|
||||||
early_stopping_method: str = "force",
|
early_stopping_method: str = "force",
|
||||||
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
|
allow_dangerous_code: bool = False,
|
||||||
**kwargs: Dict[str, Any],
|
**kwargs: Dict[str, Any],
|
||||||
) -> AgentExecutor:
|
) -> AgentExecutor:
|
||||||
"""Construct a xorbits agent from an LLM and dataframe."""
|
"""Construct a xorbits agent from an LLM and dataframe.
|
||||||
|
|
||||||
|
Security Notice:
|
||||||
|
This agent relies on access to a python repl tool which can execute
|
||||||
|
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||||
|
environment to be safely used. Failure to run this code in a properly
|
||||||
|
sandboxed environment can lead to arbitrary code execution vulnerabilities,
|
||||||
|
which can lead to data breaches, data loss, or other security incidents.
|
||||||
|
|
||||||
|
Do not use this code with untrusted inputs, with elevated permissions,
|
||||||
|
or without consulting your security team about proper sandboxing!
|
||||||
|
|
||||||
|
You must opt in to use this functionality by setting allow_dangerous_code=True.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
allow_dangerous_code: bool, default False
|
||||||
|
This agent relies on access to a python repl tool which can execute
|
||||||
|
arbitrary code. This can be dangerous and requires a specially sandboxed
|
||||||
|
environment to be safely used.
|
||||||
|
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||||
|
vulnerabilities, which can lead to data breaches, data loss, or
|
||||||
|
other security incidents.
|
||||||
|
You must opt in to use this functionality by setting
|
||||||
|
allow_dangerous_code=True.
|
||||||
|
"""
|
||||||
|
if not allow_dangerous_code:
|
||||||
|
raise ValueError(
|
||||||
|
"This agent relies on access to a python repl tool which can execute "
|
||||||
|
"arbitrary code. This can be dangerous and requires a specially sandboxed "
|
||||||
|
"environment to be safely used. Please read the security notice in the "
|
||||||
|
"doc-string of this function. You must opt-in to use this functionality "
|
||||||
|
"by setting allow_dangerous_code=True."
|
||||||
|
"For general security guidelines, please see: "
|
||||||
|
"https://python.langchain.com/v0.1/docs/security/"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from xorbits import numpy as np
|
from xorbits import numpy as np
|
||||||
from xorbits import pandas as pd
|
from xorbits import pandas as pd
|
||||||
|
@ -18,7 +18,7 @@ from langchain_core.language_models import BaseLanguageModel
|
|||||||
|
|
||||||
from langchain_experimental.pal_chain.colored_object_prompt import COLORED_OBJECT_PROMPT
|
from langchain_experimental.pal_chain.colored_object_prompt import COLORED_OBJECT_PROMPT
|
||||||
from langchain_experimental.pal_chain.math_prompt import MATH_PROMPT
|
from langchain_experimental.pal_chain.math_prompt import MATH_PROMPT
|
||||||
from langchain_experimental.pydantic_v1 import Extra, Field
|
from langchain_experimental.pydantic_v1 import Extra, Field, root_validator
|
||||||
|
|
||||||
COMMAND_EXECUTION_FUNCTIONS = ["system", "exec", "execfile", "eval", "__import__"]
|
COMMAND_EXECUTION_FUNCTIONS = ["system", "exec", "execfile", "eval", "__import__"]
|
||||||
COMMAND_EXECUTION_ATTRIBUTES = [
|
COMMAND_EXECUTION_ATTRIBUTES = [
|
||||||
@ -129,6 +129,36 @@ class PALChain(Chain):
|
|||||||
"""Validations to perform on the generated code."""
|
"""Validations to perform on the generated code."""
|
||||||
timeout: Optional[int] = 10
|
timeout: Optional[int] = 10
|
||||||
"""Timeout in seconds for the generated code to execute."""
|
"""Timeout in seconds for the generated code to execute."""
|
||||||
|
allow_dangerous_code: bool = False
|
||||||
|
"""This chain relies on the execution of generated code, which can be dangerous.
|
||||||
|
|
||||||
|
This class implements an AI technique that generates and evaluates
|
||||||
|
Python code, which can be dangerous and requires a specially sandboxed
|
||||||
|
environment to be safely used. While this class implements some basic guardrails
|
||||||
|
by limiting available locals/globals and by parsing and inspecting
|
||||||
|
the generated Python AST using `PALValidation`, those guardrails will not
|
||||||
|
deter sophisticated attackers and are not a replacement for a proper sandbox.
|
||||||
|
Do not use this class on untrusted inputs, with elevated permissions,
|
||||||
|
or without consulting your security team about proper sandboxing!
|
||||||
|
|
||||||
|
Failure to properly sandbox this class can lead to arbitrary code execution
|
||||||
|
vulnerabilities, which can lead to data breaches, data loss, or other security
|
||||||
|
incidents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@root_validator(pre=False, skip_on_failure=True)
|
||||||
|
def post_init(cls, values: Dict) -> Dict:
|
||||||
|
if not values["allow_dangerous_code"]:
|
||||||
|
raise ValueError(
|
||||||
|
"This chain relies on the execution of generated code, "
|
||||||
|
"which can be dangerous. "
|
||||||
|
"Please read the security notice for this class, and only "
|
||||||
|
"use it if you understand the security implications. "
|
||||||
|
"If you want to proceed, you will need to opt-in, by setting "
|
||||||
|
"`allow_dangerous_code` to `True`."
|
||||||
|
)
|
||||||
|
|
||||||
|
return values
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Configuration for this pydantic object."""
|
"""Configuration for this pydantic object."""
|
||||||
|
@ -8,7 +8,7 @@ from langchain_experimental.pal_chain.base import PALChain
|
|||||||
def test_math_prompt() -> None:
|
def test_math_prompt() -> None:
|
||||||
"""Test math prompt."""
|
"""Test math prompt."""
|
||||||
llm = OpenAI(temperature=0, max_tokens=512)
|
llm = OpenAI(temperature=0, max_tokens=512)
|
||||||
pal_chain = PALChain.from_math_prompt(llm, timeout=None)
|
pal_chain = PALChain.from_math_prompt(llm, timeout=None, allow_dangerous_code=False)
|
||||||
question = (
|
question = (
|
||||||
"Jan has three times the number of pets as Marcia. "
|
"Jan has three times the number of pets as Marcia. "
|
||||||
"Marcia has two more pets than Cindy. "
|
"Marcia has two more pets than Cindy. "
|
||||||
@ -21,7 +21,9 @@ def test_math_prompt() -> None:
|
|||||||
def test_colored_object_prompt() -> None:
|
def test_colored_object_prompt() -> None:
|
||||||
"""Test colored object prompt."""
|
"""Test colored object prompt."""
|
||||||
llm = OpenAI(temperature=0, max_tokens=512)
|
llm = OpenAI(temperature=0, max_tokens=512)
|
||||||
pal_chain = PALChain.from_colored_object_prompt(llm, timeout=None)
|
pal_chain = PALChain.from_colored_object_prompt(
|
||||||
|
llm, timeout=None, allow_dangerous_code=False
|
||||||
|
)
|
||||||
question = (
|
question = (
|
||||||
"On the desk, you see two blue booklets, "
|
"On the desk, you see two blue booklets, "
|
||||||
"two purple booklets, and two yellow pairs of sunglasses. "
|
"two purple booklets, and two yellow pairs of sunglasses. "
|
||||||
|
@ -11,5 +11,12 @@ from tests.unit_tests.fake_llm import FakeLLM
|
|||||||
def test_create_pandas_dataframe_agent() -> None:
|
def test_create_pandas_dataframe_agent() -> None:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
create_pandas_dataframe_agent(FakeLLM(), pd.DataFrame())
|
with pytest.raises(ValueError):
|
||||||
create_pandas_dataframe_agent(FakeLLM(), [pd.DataFrame(), pd.DataFrame()])
|
create_pandas_dataframe_agent(
|
||||||
|
FakeLLM(), pd.DataFrame(), allow_dangerous_code=False
|
||||||
|
)
|
||||||
|
|
||||||
|
create_pandas_dataframe_agent(FakeLLM(), pd.DataFrame(), allow_dangerous_code=True)
|
||||||
|
create_pandas_dataframe_agent(
|
||||||
|
FakeLLM(), [pd.DataFrame(), pd.DataFrame()], allow_dangerous_code=True
|
||||||
|
)
|
||||||
|
@ -189,7 +189,9 @@ def test_math_question_1() -> None:
|
|||||||
prompt = MATH_PROMPT.format(question=question)
|
prompt = MATH_PROMPT.format(question=question)
|
||||||
queries = {prompt: _MATH_SOLUTION_1}
|
queries = {prompt: _MATH_SOLUTION_1}
|
||||||
fake_llm = FakeLLM(queries=queries)
|
fake_llm = FakeLLM(queries=queries)
|
||||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
|
fake_pal_chain = PALChain.from_math_prompt(
|
||||||
|
fake_llm, timeout=None, allow_dangerous_code=True
|
||||||
|
)
|
||||||
output = fake_pal_chain.run(question)
|
output = fake_pal_chain.run(question)
|
||||||
assert output == "8"
|
assert output == "8"
|
||||||
|
|
||||||
@ -202,7 +204,9 @@ def test_math_question_2() -> None:
|
|||||||
prompt = MATH_PROMPT.format(question=question)
|
prompt = MATH_PROMPT.format(question=question)
|
||||||
queries = {prompt: _MATH_SOLUTION_2}
|
queries = {prompt: _MATH_SOLUTION_2}
|
||||||
fake_llm = FakeLLM(queries=queries)
|
fake_llm = FakeLLM(queries=queries)
|
||||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
|
fake_pal_chain = PALChain.from_math_prompt(
|
||||||
|
fake_llm, timeout=None, allow_dangerous_code=True
|
||||||
|
)
|
||||||
output = fake_pal_chain.run(question)
|
output = fake_pal_chain.run(question)
|
||||||
assert output == "33"
|
assert output == "33"
|
||||||
|
|
||||||
@ -214,7 +218,9 @@ def test_math_question_3() -> None:
|
|||||||
prompt = MATH_PROMPT.format(question=question)
|
prompt = MATH_PROMPT.format(question=question)
|
||||||
queries = {prompt: _MATH_SOLUTION_3}
|
queries = {prompt: _MATH_SOLUTION_3}
|
||||||
fake_llm = FakeLLM(queries=queries)
|
fake_llm = FakeLLM(queries=queries)
|
||||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=None)
|
fake_pal_chain = PALChain.from_math_prompt(
|
||||||
|
fake_llm, timeout=None, allow_dangerous_code=True
|
||||||
|
)
|
||||||
with pytest.raises(ValueError) as exc_info:
|
with pytest.raises(ValueError) as exc_info:
|
||||||
fake_pal_chain.run(question)
|
fake_pal_chain.run(question)
|
||||||
assert (
|
assert (
|
||||||
@ -231,7 +237,9 @@ def test_math_question_infinite_loop() -> None:
|
|||||||
prompt = MATH_PROMPT.format(question=question)
|
prompt = MATH_PROMPT.format(question=question)
|
||||||
queries = {prompt: _MATH_SOLUTION_INFINITE_LOOP}
|
queries = {prompt: _MATH_SOLUTION_INFINITE_LOOP}
|
||||||
fake_llm = FakeLLM(queries=queries)
|
fake_llm = FakeLLM(queries=queries)
|
||||||
fake_pal_chain = PALChain.from_math_prompt(fake_llm, timeout=1)
|
fake_pal_chain = PALChain.from_math_prompt(
|
||||||
|
fake_llm, timeout=1, allow_dangerous_code=True
|
||||||
|
)
|
||||||
output = fake_pal_chain.run(question)
|
output = fake_pal_chain.run(question)
|
||||||
assert output == "Execution timed out"
|
assert output == "Execution timed out"
|
||||||
|
|
||||||
@ -245,7 +253,9 @@ def test_color_question_1() -> None:
|
|||||||
prompt = COLORED_OBJECT_PROMPT.format(question=question)
|
prompt = COLORED_OBJECT_PROMPT.format(question=question)
|
||||||
queries = {prompt: _COLORED_OBJECT_SOLUTION_1}
|
queries = {prompt: _COLORED_OBJECT_SOLUTION_1}
|
||||||
fake_llm = FakeLLM(queries=queries)
|
fake_llm = FakeLLM(queries=queries)
|
||||||
fake_pal_chain = PALChain.from_colored_object_prompt(fake_llm, timeout=None)
|
fake_pal_chain = PALChain.from_colored_object_prompt(
|
||||||
|
fake_llm, timeout=None, allow_dangerous_code=True
|
||||||
|
)
|
||||||
output = fake_pal_chain.run(question)
|
output = fake_pal_chain.run(question)
|
||||||
assert output == "0"
|
assert output == "0"
|
||||||
|
|
||||||
@ -260,7 +270,9 @@ def test_color_question_2() -> None:
|
|||||||
prompt = COLORED_OBJECT_PROMPT.format(question=question)
|
prompt = COLORED_OBJECT_PROMPT.format(question=question)
|
||||||
queries = {prompt: _COLORED_OBJECT_SOLUTION_2}
|
queries = {prompt: _COLORED_OBJECT_SOLUTION_2}
|
||||||
fake_llm = FakeLLM(queries=queries)
|
fake_llm = FakeLLM(queries=queries)
|
||||||
fake_pal_chain = PALChain.from_colored_object_prompt(fake_llm, timeout=None)
|
fake_pal_chain = PALChain.from_colored_object_prompt(
|
||||||
|
fake_llm, timeout=None, allow_dangerous_code=True
|
||||||
|
)
|
||||||
output = fake_pal_chain.run(question)
|
output = fake_pal_chain.run(question)
|
||||||
assert output == "brown"
|
assert output == "brown"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user