Merge branch 'langchain-ai:master' into master

This commit is contained in:
olgavrou
2023-09-01 04:10:49 -04:00
committed by GitHub
23 changed files with 563 additions and 261 deletions

View File

@@ -76,6 +76,7 @@ lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/langchain --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint lint_diff:
./scripts/check_pydantic.sh .
poetry run ruff .
poetry run black $(PYTHON_FILES) --check
poetry run mypy $(PYTHON_FILES)

View File

@@ -120,6 +120,7 @@ class Run(BaseRunV2):
ChainRun.update_forward_refs()
ToolRun.update_forward_refs()
Run.update_forward_refs()
__all__ = [
"BaseRun",

View File

@@ -11,7 +11,7 @@ from langchain.prompts.prompt import PromptTemplate
DEFAULT_REFINE_PROMPT_TMPL = (
"The original question is as follows: {question}\n"
"We have provided an existing answer: {existing_answer}\n"
"We have the opportunity to refine the existing answer"
"We have the opportunity to refine the existing answer "
"(only if needed) with some more context below.\n"
"------------\n"
"{context_str}\n"
@@ -20,12 +20,10 @@ DEFAULT_REFINE_PROMPT_TMPL = (
"answer the question. "
"If the context isn't useful, return the original answer."
)
DEFAULT_REFINE_PROMPT = PromptTemplate(
input_variables=["question", "existing_answer", "context_str"],
template=DEFAULT_REFINE_PROMPT_TMPL,
)
DEFAULT_REFINE_PROMPT = PromptTemplate.from_template(DEFAULT_REFINE_PROMPT_TMPL)
refine_template = (
"We have the opportunity to refine the existing answer"
"We have the opportunity to refine the existing answer "
"(only if needed) with some more context below.\n"
"------------\n"
"{context_str}\n"
@@ -34,12 +32,9 @@ refine_template = (
"answer the question. "
"If the context isn't useful, return the original answer."
)
messages = [
HumanMessagePromptTemplate.from_template("{question}"),
AIMessagePromptTemplate.from_template("{existing_answer}"),
HumanMessagePromptTemplate.from_template(refine_template),
]
CHAT_REFINE_PROMPT = ChatPromptTemplate.from_messages(messages)
CHAT_REFINE_PROMPT = ChatPromptTemplate.from_messages(
[("human", "{question}"), ("ai", "{existing_answer}"), ("human", "refine_template")]
)
REFINE_PROMPT_SELECTOR = ConditionalPromptSelector(
default_prompt=DEFAULT_REFINE_PROMPT,
conditionals=[(is_chat_model, CHAT_REFINE_PROMPT)],
@@ -48,28 +43,25 @@ REFINE_PROMPT_SELECTOR = ConditionalPromptSelector(
DEFAULT_TEXT_QA_PROMPT_TMPL = (
"Context information is below. \n"
"---------------------\n"
"{context_str}"
"\n---------------------\n"
"------------\n"
"{context_str}\n"
"------------\n"
"Given the context information and not prior knowledge, "
"answer the question: {question}\n"
)
DEFAULT_TEXT_QA_PROMPT = PromptTemplate(
input_variables=["context_str", "question"], template=DEFAULT_TEXT_QA_PROMPT_TMPL
)
DEFAULT_TEXT_QA_PROMPT = PromptTemplate.from_template(DEFAULT_TEXT_QA_PROMPT_TMPL)
chat_qa_prompt_template = (
"Context information is below. \n"
"---------------------\n"
"{context_str}"
"\n---------------------\n"
"Context information is below.\n"
"------------\n"
"{context_str}\n"
"------------\n"
"Given the context information and not prior knowledge, "
"answer any questions"
)
messages = [
SystemMessagePromptTemplate.from_template(chat_qa_prompt_template),
HumanMessagePromptTemplate.from_template("{question}"),
]
CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(
[("system", chat_qa_prompt_template), ("human", "{question}")]
)
QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(
default_prompt=DEFAULT_TEXT_QA_PROMPT,
conditionals=[(is_chat_model, CHAT_QUESTION_PROMPT)],

View File

@@ -1,21 +1,16 @@
# flake8: noqa
from langchain.prompts import PromptTemplate
REFINE_PROMPT_TMPL = (
"Your job is to produce a final summary\n"
"We have provided an existing summary up to a certain point: {existing_answer}\n"
"We have the opportunity to refine the existing summary"
"(only if needed) with some more context below.\n"
"------------\n"
"{text}\n"
"------------\n"
"Given the new context, refine the original summary\n"
"If the context isn't useful, return the original summary."
)
REFINE_PROMPT = PromptTemplate(
input_variables=["existing_answer", "text"],
template=REFINE_PROMPT_TMPL,
)
REFINE_PROMPT_TMPL = """\
Your job is to produce a final summary.
We have provided an existing summary up to a certain point: {existing_answer}
We have the opportunity to refine the existing summary (only if needed) with some more context below.
------------
{text}
------------
Given the new context, refine the original summary.
If the context isn't useful, return the original summary.\
""" # noqa: E501
REFINE_PROMPT = PromptTemplate.from_template(REFINE_PROMPT_TMPL)
prompt_template = """Write a concise summary of the following:
@@ -25,4 +20,4 @@ prompt_template = """Write a concise summary of the following:
CONCISE SUMMARY:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
PROMPT = PromptTemplate.from_template(prompt_template)

View File

@@ -332,9 +332,9 @@ def index(
uids_to_delete = record_manager.list_keys(before=index_start_dt)
if uids_to_delete:
# Then delete from vector store.
vector_store.delete(uids_to_delete)
# First delete from record store.
vector_store.delete(uids_to_delete)
# Then delete from record manager.
record_manager.delete_keys(uids_to_delete)
num_deleted = len(uids_to_delete)

View File

@@ -14,10 +14,12 @@ allow it to work with a variety of SQL as a backend.
* Keys can be deleted.
"""
import contextlib
import decimal
import uuid
from typing import Any, Dict, Generator, List, Optional, Sequence
from typing import Any, Dict, Generator, List, Optional, Sequence, Union
from sqlalchemy import (
URL,
Column,
Engine,
Float,
@@ -28,7 +30,6 @@ from sqlalchemy import (
create_engine,
text,
)
from sqlalchemy.dialects.sqlite import insert
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session, sessionmaker
@@ -77,7 +78,7 @@ class SQLRecordManager(RecordManager):
namespace: str,
*,
engine: Optional[Engine] = None,
db_url: Optional[str] = None,
db_url: Union[None, str, URL] = None,
engine_kwargs: Optional[Dict[str, Any]] = None,
) -> None:
"""Initialize the SQLRecordManager.
@@ -114,6 +115,7 @@ class SQLRecordManager(RecordManager):
raise AssertionError("Something went wrong with configuration of engine.")
self.engine = _engine
self.dialect = _engine.dialect.name
self.session_factory = sessionmaker(bind=self.engine)
def create_schema(self) -> None:
@@ -145,8 +147,16 @@ class SQLRecordManager(RecordManager):
# 2440587.5 - constant represents the Julian day number for January 1, 1970
# 86400.0 - constant represents the number of seconds
# in a day (24 hours * 60 minutes * 60 seconds)
query = text("SELECT (julianday('now') - 2440587.5) * 86400.0;")
if self.dialect == "sqlite":
query = text("SELECT (julianday('now') - 2440587.5) * 86400.0;")
elif self.dialect == "postgresql":
query = text("SELECT EXTRACT (EPOCH FROM CURRENT_TIMESTAMP);")
else:
raise NotImplementedError(f"Not implemented for dialect {self.dialect}")
dt = session.execute(query).scalar()
if isinstance(dt, decimal.Decimal):
dt = float(dt)
if not isinstance(dt, float):
raise AssertionError(f"Unexpected type for datetime: {type(dt)}")
return dt
@@ -192,17 +202,37 @@ class SQLRecordManager(RecordManager):
]
with self._make_session() as session:
# Note: uses SQLite insert to make on_conflict_do_update work.
# This code needs to be generalized a bit to work with more dialects.
insert_stmt = insert(UpsertionRecord).values(records_to_upsert)
stmt = insert_stmt.on_conflict_do_update( # type: ignore[attr-defined]
[UpsertionRecord.key, UpsertionRecord.namespace],
set_=dict(
# attr-defined type ignore
updated_at=insert_stmt.excluded.updated_at, # type: ignore
group_id=insert_stmt.excluded.group_id, # type: ignore
),
)
if self.dialect == "sqlite":
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
# Note: uses SQLite insert to make on_conflict_do_update work.
# This code needs to be generalized a bit to work with more dialects.
insert_stmt = sqlite_insert(UpsertionRecord).values(records_to_upsert)
stmt = insert_stmt.on_conflict_do_update( # type: ignore[attr-defined]
[UpsertionRecord.key, UpsertionRecord.namespace],
set_=dict(
# attr-defined type ignore
updated_at=insert_stmt.excluded.updated_at, # type: ignore
group_id=insert_stmt.excluded.group_id, # type: ignore
),
)
elif self.dialect == "postgresql":
from sqlalchemy.dialects.postgresql import insert as pg_insert
# Note: uses SQLite insert to make on_conflict_do_update work.
# This code needs to be generalized a bit to work with more dialects.
insert_stmt = pg_insert(UpsertionRecord).values(records_to_upsert)
stmt = insert_stmt.on_conflict_do_update( # type: ignore[attr-defined]
"uix_key_namespace", # Name of constraint
set_=dict(
# attr-defined type ignore
updated_at=insert_stmt.excluded.updated_at, # type: ignore
group_id=insert_stmt.excluded.group_id, # type: ignore
),
)
else:
raise NotImplementedError(f"Unsupported dialect {self.dialect}")
session.execute(stmt)
session.commit()

View File

@@ -62,6 +62,7 @@ from langchain.llms.mosaicml import MosaicML
from langchain.llms.nlpcloud import NLPCloud
from langchain.llms.octoai_endpoint import OctoAIEndpoint
from langchain.llms.ollama import Ollama
from langchain.llms.opaqueprompts import OpaquePrompts
from langchain.llms.openai import AzureOpenAI, OpenAI, OpenAIChat
from langchain.llms.openllm import OpenLLM
from langchain.llms.openlm import OpenLM
@@ -69,7 +70,6 @@ from langchain.llms.petals import Petals
from langchain.llms.pipelineai import PipelineAI
from langchain.llms.predibase import Predibase
from langchain.llms.predictionguard import PredictionGuard
from langchain.llms.promptguard import PromptGuard
from langchain.llms.promptlayer_openai import PromptLayerOpenAI, PromptLayerOpenAIChat
from langchain.llms.replicate import Replicate
from langchain.llms.rwkv import RWKV
@@ -142,7 +142,7 @@ __all__ = [
"PredictionGuard",
"PromptLayerOpenAI",
"PromptLayerOpenAIChat",
"PromptGuard",
"OpaquePrompts",
"RWKV",
"Replicate",
"SagemakerEndpoint",
@@ -207,7 +207,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
"petals": Petals,
"pipelineai": PipelineAI,
"predibase": Predibase,
"promptguard": PromptGuard,
"opaqueprompts": OpaquePrompts,
"replicate": Replicate,
"rwkv": RWKV,
"sagemaker_endpoint": SagemakerEndpoint,

View File

@@ -10,23 +10,23 @@ from langchain.utils import get_from_dict_or_env
logger = logging.getLogger(__name__)
class PromptGuard(LLM):
"""An LLM wrapper that uses PromptGuard to sanitize prompts.
class OpaquePrompts(LLM):
"""An LLM wrapper that uses OpaquePrompts to sanitize prompts.
Wraps another LLM and sanitizes prompts before passing it to the LLM, then
de-sanitizes the response.
To use, you should have the ``promptguard`` python package installed,
and the environment variable ``PROMPTGUARD_API_KEY`` set with
To use, you should have the ``opaqueprompts`` python package installed,
and the environment variable ``OPAQUEPROMPTS_API_KEY`` set with
your API key, or pass it as a named parameter to the constructor.
Example:
.. code-block:: python
from langchain.llms import PromptGuard
from langchain.llms import OpaquePrompts
from langchain.chat_models import ChatOpenAI
prompt_guard_llm = PromptGuard(base_llm=ChatOpenAI())
op_llm = OpaquePrompts(base_llm=ChatOpenAI())
"""
base_llm: BaseLanguageModel
@@ -39,29 +39,29 @@ class PromptGuard(LLM):
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validates that the PromptGuard API key and the Python package exist."""
"""Validates that the OpaquePrompts API key and the Python package exist."""
try:
import promptguard as pg
import opaqueprompts as op
except ImportError:
raise ImportError(
"Could not import the `promptguard` Python package, "
"please install it with `pip install promptguard`."
"Could not import the `opaqueprompts` Python package, "
"please install it with `pip install opaqueprompts`."
)
if pg.__package__ is None:
if op.__package__ is None:
raise ValueError(
"Could not properly import `promptguard`, "
"promptguard.__package__ is None."
"Could not properly import `opaqueprompts`, "
"opaqueprompts.__package__ is None."
)
api_key = get_from_dict_or_env(
values, "promptguard_api_key", "PROMPTGUARD_API_KEY", default=""
values, "opaqueprompts_api_key", "OPAQUEPROMPTS_API_KEY", default=""
)
if not api_key:
raise ValueError(
"Could not find PROMPTGUARD_API_KEY in the environment. "
"Please set it to your PromptGuard API key."
"You can get it by creating an account on the PromptGuard website: "
"https://promptguard.opaque.co/ ."
"Could not find OPAQUEPROMPTS_API_KEY in the environment. "
"Please set it to your OpaquePrompts API key."
"You can get it by creating an account on the OpaquePrompts website: "
"https://opaqueprompts.opaque.co/ ."
)
return values
@@ -83,14 +83,14 @@ class PromptGuard(LLM):
Example:
.. code-block:: python
response = prompt_guard_llm("Tell me a joke.")
response = op_llm("Tell me a joke.")
"""
import promptguard as pg
import opaqueprompts as op
_run_manager = run_manager or CallbackManagerForLLMRun.get_noop_manager()
# sanitize the prompt by replacing the sensitive information with a placeholder
sanitize_response: pg.SanitizeResponse = pg.sanitize([prompt])
sanitize_response: op.SanitizeResponse = op.sanitize([prompt])
sanitized_prompt_value_str = sanitize_response.sanitized_texts[0]
# TODO: Add in callbacks once child runs for LLMs are supported by LangSmith.
@@ -101,7 +101,7 @@ class PromptGuard(LLM):
)
# desanitize the response by restoring the original sensitive information
desanitize_response: pg.DesanitizeResponse = pg.desanitize(
desanitize_response: op.DesanitizeResponse = op.desanitize(
llm_response,
secure_context=sanitize_response.secure_context,
)
@@ -113,4 +113,4 @@ class PromptGuard(LLM):
This is an override of the base class method.
"""
return "promptguard"
return "opaqueprompts"

View File

@@ -13,7 +13,7 @@ def _replace_new_line(match: re.Match[str]) -> str:
value = re.sub(r"\n", r"\\n", value)
value = re.sub(r"\r", r"\\r", value)
value = re.sub(r"\t", r"\\t", value)
value = re.sub('"', r"\"", value)
value = re.sub(r'(?<!\\)"', r"\"", value)
return match.group(1) + value + match.group(3)

View File

@@ -150,7 +150,7 @@ class WebResearchRetriever(BaseRetriever):
return query.strip()
def search_tool(self, query: str, num_search_results: int = 1) -> List[dict]:
"""Returns num_serch_results pages per Google search."""
"""Returns num_search_results pages per Google search."""
query_clean = self.clean_search_query(query)
result = self.search.results(query_clean, num_search_results)
return result

View File

@@ -273,7 +273,11 @@ class ChildTool(BaseTool):
Add run_manager: Optional[AsyncCallbackManagerForToolRun] = None
to child implementations to enable tracing,
"""
raise NotImplementedError()
return await asyncio.get_running_loop().run_in_executor(
None,
partial(self._run, **kwargs),
*args,
)
def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
# For backwards compatibility, if run_input is a string,
@@ -522,7 +526,10 @@ class Tool(BaseTool):
if new_argument_supported
else await self.coroutine(*args, **kwargs)
)
raise NotImplementedError("Tool does not support async")
else:
return await asyncio.get_running_loop().run_in_executor(
None, partial(self._run, run_manager=run_manager, **kwargs), *args
)
# TODO: this is for backwards compatibility, remove in future
def __init__(
@@ -634,7 +641,12 @@ class StructuredTool(BaseTool):
if new_argument_supported
else await self.coroutine(*args, **kwargs)
)
raise NotImplementedError("Tool does not support async")
return await asyncio.get_running_loop().run_in_executor(
None,
self._run,
partial(self._run, run_manager=run_manager, **kwargs),
*args,
)
@classmethod
def from_function(

View File

@@ -4,6 +4,7 @@ Other LangChain classes use **Utilities** to interact with third-part systems
and packages.
"""
from langchain.utilities.alpha_vantage import AlphaVantageAPIWrapper
from langchain.utilities.apify import ApifyWrapper
from langchain.utilities.arxiv import ArxivAPIWrapper
from langchain.utilities.awslambda import LambdaWrapper
from langchain.utilities.bash import BashProcess
@@ -38,6 +39,7 @@ from langchain.utilities.zapier import ZapierNLAWrapper
__all__ = [
"AlphaVantageAPIWrapper",
"ApifyWrapper",
"ArxivAPIWrapper",
"BashProcess",
"BibtexparserWrapper",

View File

@@ -0,0 +1,194 @@
from typing import Any, Callable, Dict, Optional
from langchain.document_loaders import ApifyDatasetLoader
from langchain.document_loaders.base import Document
from langchain.pydantic_v1 import BaseModel, root_validator
from langchain.utils import get_from_dict_or_env
class ApifyWrapper(BaseModel):
"""Wrapper around Apify.
To use, you should have the ``apify-client`` python package installed,
and the environment variable ``APIFY_API_TOKEN`` set with your API key, or pass
`apify_api_token` as a named parameter to the constructor.
"""
apify_client: Any
apify_client_async: Any
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate environment.
Validate that an Apify API token is set and the apify-client
Python package exists in the current environment.
"""
apify_api_token = get_from_dict_or_env(
values, "apify_api_token", "APIFY_API_TOKEN"
)
try:
from apify_client import ApifyClient, ApifyClientAsync
values["apify_client"] = ApifyClient(apify_api_token)
values["apify_client_async"] = ApifyClientAsync(apify_api_token)
except ImportError:
raise ValueError(
"Could not import apify-client Python package. "
"Please install it with `pip install apify-client`."
)
return values
def call_actor(
self,
actor_id: str,
run_input: Dict,
dataset_mapping_function: Callable[[Dict], Document],
*,
build: Optional[str] = None,
memory_mbytes: Optional[int] = None,
timeout_secs: Optional[int] = None,
) -> ApifyDatasetLoader:
"""Run an Actor on the Apify platform and wait for results to be ready.
Args:
actor_id (str): The ID or name of the Actor on the Apify platform.
run_input (Dict): The input object of the Actor that you're trying to run.
dataset_mapping_function (Callable): A function that takes a single
dictionary (an Apify dataset item) and converts it to an
instance of the Document class.
build (str, optional): Optionally specifies the actor build to run.
It can be either a build tag or build number.
memory_mbytes (int, optional): Optional memory limit for the run,
in megabytes.
timeout_secs (int, optional): Optional timeout for the run, in seconds.
Returns:
ApifyDatasetLoader: A loader that will fetch the records from the
Actor run's default dataset.
"""
actor_call = self.apify_client.actor(actor_id).call(
run_input=run_input,
build=build,
memory_mbytes=memory_mbytes,
timeout_secs=timeout_secs,
)
return ApifyDatasetLoader(
dataset_id=actor_call["defaultDatasetId"],
dataset_mapping_function=dataset_mapping_function,
)
async def acall_actor(
self,
actor_id: str,
run_input: Dict,
dataset_mapping_function: Callable[[Dict], Document],
*,
build: Optional[str] = None,
memory_mbytes: Optional[int] = None,
timeout_secs: Optional[int] = None,
) -> ApifyDatasetLoader:
"""Run an Actor on the Apify platform and wait for results to be ready.
Args:
actor_id (str): The ID or name of the Actor on the Apify platform.
run_input (Dict): The input object of the Actor that you're trying to run.
dataset_mapping_function (Callable): A function that takes a single
dictionary (an Apify dataset item) and converts it to
an instance of the Document class.
build (str, optional): Optionally specifies the actor build to run.
It can be either a build tag or build number.
memory_mbytes (int, optional): Optional memory limit for the run,
in megabytes.
timeout_secs (int, optional): Optional timeout for the run, in seconds.
Returns:
ApifyDatasetLoader: A loader that will fetch the records from the
Actor run's default dataset.
"""
actor_call = await self.apify_client_async.actor(actor_id).call(
run_input=run_input,
build=build,
memory_mbytes=memory_mbytes,
timeout_secs=timeout_secs,
)
return ApifyDatasetLoader(
dataset_id=actor_call["defaultDatasetId"],
dataset_mapping_function=dataset_mapping_function,
)
def call_actor_task(
self,
task_id: str,
task_input: Dict,
dataset_mapping_function: Callable[[Dict], Document],
*,
build: Optional[str] = None,
memory_mbytes: Optional[int] = None,
timeout_secs: Optional[int] = None,
) -> ApifyDatasetLoader:
"""Run a saved Actor task on Apify and wait for results to be ready.
Args:
task_id (str): The ID or name of the task on the Apify platform.
task_input (Dict): The input object of the task that you're trying to run.
Overrides the task's saved input.
dataset_mapping_function (Callable): A function that takes a single
dictionary (an Apify dataset item) and converts it to an
instance of the Document class.
build (str, optional): Optionally specifies the actor build to run.
It can be either a build tag or build number.
memory_mbytes (int, optional): Optional memory limit for the run,
in megabytes.
timeout_secs (int, optional): Optional timeout for the run, in seconds.
Returns:
ApifyDatasetLoader: A loader that will fetch the records from the
task run's default dataset.
"""
task_call = self.apify_client.task(task_id).call(
task_input=task_input,
build=build,
memory_mbytes=memory_mbytes,
timeout_secs=timeout_secs,
)
return ApifyDatasetLoader(
dataset_id=task_call["defaultDatasetId"],
dataset_mapping_function=dataset_mapping_function,
)
async def acall_actor_task(
self,
task_id: str,
task_input: Dict,
dataset_mapping_function: Callable[[Dict], Document],
*,
build: Optional[str] = None,
memory_mbytes: Optional[int] = None,
timeout_secs: Optional[int] = None,
) -> ApifyDatasetLoader:
"""Run a saved Actor task on Apify and wait for results to be ready.
Args:
task_id (str): The ID or name of the task on the Apify platform.
task_input (Dict): The input object of the task that you're trying to run.
Overrides the task's saved input.
dataset_mapping_function (Callable): A function that takes a single
dictionary (an Apify dataset item) and converts it to an
instance of the Document class.
build (str, optional): Optionally specifies the actor build to run.
It can be either a build tag or build number.
memory_mbytes (int, optional): Optional memory limit for the run,
in megabytes.
timeout_secs (int, optional): Optional timeout for the run, in seconds.
Returns:
ApifyDatasetLoader: A loader that will fetch the records from the
task run's default dataset.
"""
task_call = await self.apify_client_async.task(task_id).call(
task_input=task_input,
build=build,
memory_mbytes=memory_mbytes,
timeout_secs=timeout_secs,
)
return ApifyDatasetLoader(
dataset_id=task_call["defaultDatasetId"],
dataset_mapping_function=dataset_mapping_function,
)

View File

@@ -31,16 +31,16 @@ def sanitize(
The `secure_context` needs to be passed to the `desanitize` function.
"""
try:
import promptguard as pg
import opaqueprompts as op
except ImportError:
raise ImportError(
"Could not import the `promptguard` Python package, "
"please install it with `pip install promptguard`."
"Could not import the `opaqueprompts` Python package, "
"please install it with `pip install opaqueprompts`."
)
if isinstance(input, str):
# the input could be a string, so we sanitize the string
sanitize_response: pg.SanitizeResponse = pg.sanitize([input])
sanitize_response: op.SanitizeResponse = op.sanitize([input])
return {
"sanitized_input": sanitize_response.sanitized_texts[0],
"secure_context": sanitize_response.secure_context,
@@ -55,7 +55,7 @@ def sanitize(
values.append(input[key])
# sanitize the values
sanitize_values_response: pg.SanitizeResponse = pg.sanitize(values)
sanitize_values_response: op.SanitizeResponse = op.sanitize(values)
# reconstruct the dict with the sanitized values
sanitized_input_values = sanitize_values_response.sanitized_texts
@@ -85,13 +85,13 @@ def desanitize(sanitized_text: str, secure_context: bytes) -> str:
De-sanitized text.
"""
try:
import promptguard as pg
import opaqueprompts as op
except ImportError:
raise ImportError(
"Could not import the `promptguard` Python package, "
"please install it with `pip install promptguard`."
"Could not import the `opaqueprompts` Python package, "
"please install it with `pip install opaqueprompts`."
)
desanitize_response: pg.DesanitizeResponse = pg.desanitize(
desanitize_response: op.DesanitizeResponse = op.desanitize(
sanitized_text, secure_context
)
return desanitize_response.desanitized_text

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain"
version = "0.0.277"
version = "0.0.278"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"

View File

@@ -0,0 +1,27 @@
#!/bin/bash
#
# This script searches for lines starting with "import pydantic" or "from pydantic"
# in tracked files within a Git repository.
#
# Usage: ./scripts/check_pydantic.sh /path/to/repository
# Check if a path argument is provided
if [ $# -ne 1 ]; then
echo "Usage: $0 /path/to/repository"
exit 1
fi
repository_path="$1"
# Search for lines matching the pattern within the specified repository
result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
# Check if any matching lines were found
if [ -n "$result" ]; then
echo "ERROR: The following lines need to be updated:"
echo "$result"
echo "Please replace the code with an import from langchain.pydantic_v1."
echo "For example, replace 'from pydantic import BaseModel'"
echo "with 'from langchain.pydantic_v1 import BaseModel'"
exit 1
fi

View File

@@ -1,7 +1,7 @@
import langchain.utilities.promptguard as pgf
import langchain.utilities.opaqueprompts as op
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI
from langchain.llms.promptguard import PromptGuard
from langchain.llms.opaqueprompts import OpaquePrompts
from langchain.memory import ConversationBufferWindowMemory
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableMap
@@ -42,10 +42,10 @@ Question: ```{question}```
"""
def test_promptguard() -> None:
def test_opaqueprompts() -> None:
chain = LLMChain(
prompt=PromptTemplate.from_template(prompt_template),
llm=PromptGuard(llm=OpenAI()),
llm=OpaquePrompts(llm=OpenAI()),
memory=ConversationBufferWindowMemory(k=2),
)
@@ -58,11 +58,11 @@ def test_promptguard() -> None:
assert isinstance(output, str)
def test_promptguard_functions() -> None:
def test_opaqueprompts_functions() -> None:
prompt = (PromptTemplate.from_template(prompt_template),)
llm = OpenAI()
pg_chain = (
pgf.sanitize
op.sanitize
| RunnableMap(
{
"response": (lambda x: x["sanitized_input"]) # type: ignore
@@ -72,7 +72,7 @@ def test_promptguard_functions() -> None:
"secure_context": lambda x: x["secure_context"],
}
)
| (lambda x: pgf.desanitize(x["response"], x["secure_context"]))
| (lambda x: op.desanitize(x["response"], x["secure_context"]))
)
pg_chain.invoke(

View File

@@ -67,6 +67,34 @@ JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
}
```"""
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{"foo": "bar", "bar": "foo"}"
}
```"""
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
}
```"""
JSON_WITH_PYTHON_DICT = """```json
{
"action": "Final Answer",
"action_input": {"foo": "bar", "bar": "foo"}
}
```"""
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"
}
```"""
NO_TICKS = """{
"foo": "bar"
}"""
@@ -131,3 +159,27 @@ def test_parse_json_with_code_blocks() -> None:
"action": "Final Answer",
"action_input": '```bar\n<div id="1" class="value">\n\ttext\n</div>```',
}
TEST_CASES_ESCAPED_QUOTES = [
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
]
@pytest.mark.parametrize("json_string", TEST_CASES_ESCAPED_QUOTES)
def test_parse_nested_json_with_escaped_quotes(json_string: str) -> None:
parsed = parse_json_markdown(json_string)
assert parsed == {
"action": "Final Answer",
"action_input": '{"foo": "bar", "bar": "foo"}',
}
def test_parse_json_with_python_dict() -> None:
parsed = parse_json_markdown(JSON_WITH_PYTHON_DICT)
assert parsed == {
"action": "Final Answer",
"action_input": {"foo": "bar", "bar": "foo"},
}