From b5cd1e0fed13c227171903e89d327b9c9d482a49 Mon Sep 17 00:00:00 2001 From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> Date: Tue, 29 Aug 2023 13:51:56 -0400 Subject: [PATCH] Add security notices on PAL and CPAL experimental chains. (#9938) Clearly document that the PAL and CPAL techniques involve generating code, and that such code must be properly sandboxed and given appropriate narrowly-scoped credentials in order to ensure security. While our implementations include some mitigations, Python and SQL sandboxing is well-known to be a very hard problem and our mitigations are no replacement for proper sandboxing and permissions management. The implementation of such techniques must be performed outside the scope of the Python process where this package's code runs, so its correct setup and administration must therefore be the responsibility of the user of this code. --- .../langchain_experimental/cpal/base.py | 35 +++++++++++++++++-- .../langchain_experimental/pal_chain/base.py | 9 +++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/libs/experimental/langchain_experimental/cpal/base.py b/libs/experimental/langchain_experimental/cpal/base.py index bba59d5d32a..4ee817f357e 100644 --- a/libs/experimental/langchain_experimental/cpal/base.py +++ b/libs/experimental/langchain_experimental/cpal/base.py @@ -131,13 +131,34 @@ class InterventionChain(_BaseStoryElementChain): class QueryChain(_BaseStoryElementChain): - """Query the outcome table using SQL.""" + """Query the outcome table using SQL. + + *Security note*: This class implements an AI technique that generates SQL code. + If those SQL commands are executed, it's critical to ensure they use credentials + that are narrowly-scoped to only include the permissions this chain needs. + Failure to do so may result in data corruption or loss, since this chain may + attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted. + The best way to guard against such negative outcomes is to (as appropriate) + limit the permissions granted to the credentials used with this chain. + """ pydantic_model: ClassVar[Type[pydantic.BaseModel]] = QueryModel template: ClassVar[str] = query_template # TODO: incl. table schema class CPALChain(_BaseStoryElementChain): + """Causal program-aided language (CPAL) chain implementation. + + *Security note*: The building blocks of this class include the implementation + of an AI technique that generates SQL code. If those SQL commands + are executed, it's critical to ensure they use credentials that + are narrowly-scoped to only include the permissions this chain needs. + Failure to do so may result in data corruption or loss, since this chain may + attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted. + The best way to guard against such negative outcomes is to (as appropriate) + limit the permissions granted to the credentials used with this chain. + """ + llm: BaseLanguageModel narrative_chain: Optional[NarrativeChain] = None causal_chain: Optional[CausalChain] = None @@ -151,7 +172,17 @@ class CPALChain(_BaseStoryElementChain): llm: BaseLanguageModel, **kwargs: Any, ) -> CPALChain: - """instantiation depends on component chains""" + """instantiation depends on component chains + + *Security note*: The building blocks of this class include the implementation + of an AI technique that generates SQL code. If those SQL commands + are executed, it's critical to ensure they use credentials that + are narrowly-scoped to only include the permissions this chain needs. + Failure to do so may result in data corruption or loss, since this chain may + attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted. + The best way to guard against such negative outcomes is to (as appropriate) + limit the permissions granted to the credentials used with this chain. + """ return cls( llm=llm, chain=LLMChain( diff --git a/libs/experimental/langchain_experimental/pal_chain/base.py b/libs/experimental/langchain_experimental/pal_chain/base.py index 6717c9f4a13..275f9e93087 100644 --- a/libs/experimental/langchain_experimental/pal_chain/base.py +++ b/libs/experimental/langchain_experimental/pal_chain/base.py @@ -90,6 +90,15 @@ class PALChain(Chain): This class implements the Program-Aided Language Models (PAL) for generating code solutions. PAL is a technique described in the paper "Program-Aided Language Models" (https://arxiv.org/pdf/2211.10435.pdf). + + *Security note*: This class implements an AI technique that generates and evaluates + Python code, which can be dangerous and requires a specially sandboxed + environment to be safely used. While this class implements some basic guardrails + by limiting available locals/globals and by parsing and inspecting + the generated Python AST using `PALValidation`, those guardrails will not + deter sophisticated attackers and are not a replacement for a proper sandbox. + Do not use this class on untrusted inputs, with elevated permissions, + or without consulting your security team about proper sandboxing! """ llm_chain: LLMChain