mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-03 12:07:36 +00:00
Merge branch 'langchain-ai:master' into master
This commit is contained in:
@@ -76,6 +76,7 @@ lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/langchain --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
||||
|
||||
lint lint_diff:
|
||||
./scripts/check_pydantic.sh .
|
||||
poetry run ruff .
|
||||
poetry run black $(PYTHON_FILES) --check
|
||||
poetry run mypy $(PYTHON_FILES)
|
||||
|
@@ -120,6 +120,7 @@ class Run(BaseRunV2):
|
||||
|
||||
ChainRun.update_forward_refs()
|
||||
ToolRun.update_forward_refs()
|
||||
Run.update_forward_refs()
|
||||
|
||||
__all__ = [
|
||||
"BaseRun",
|
||||
|
@@ -11,7 +11,7 @@ from langchain.prompts.prompt import PromptTemplate
|
||||
DEFAULT_REFINE_PROMPT_TMPL = (
|
||||
"The original question is as follows: {question}\n"
|
||||
"We have provided an existing answer: {existing_answer}\n"
|
||||
"We have the opportunity to refine the existing answer"
|
||||
"We have the opportunity to refine the existing answer "
|
||||
"(only if needed) with some more context below.\n"
|
||||
"------------\n"
|
||||
"{context_str}\n"
|
||||
@@ -20,12 +20,10 @@ DEFAULT_REFINE_PROMPT_TMPL = (
|
||||
"answer the question. "
|
||||
"If the context isn't useful, return the original answer."
|
||||
)
|
||||
DEFAULT_REFINE_PROMPT = PromptTemplate(
|
||||
input_variables=["question", "existing_answer", "context_str"],
|
||||
template=DEFAULT_REFINE_PROMPT_TMPL,
|
||||
)
|
||||
DEFAULT_REFINE_PROMPT = PromptTemplate.from_template(DEFAULT_REFINE_PROMPT_TMPL)
|
||||
|
||||
refine_template = (
|
||||
"We have the opportunity to refine the existing answer"
|
||||
"We have the opportunity to refine the existing answer "
|
||||
"(only if needed) with some more context below.\n"
|
||||
"------------\n"
|
||||
"{context_str}\n"
|
||||
@@ -34,12 +32,9 @@ refine_template = (
|
||||
"answer the question. "
|
||||
"If the context isn't useful, return the original answer."
|
||||
)
|
||||
messages = [
|
||||
HumanMessagePromptTemplate.from_template("{question}"),
|
||||
AIMessagePromptTemplate.from_template("{existing_answer}"),
|
||||
HumanMessagePromptTemplate.from_template(refine_template),
|
||||
]
|
||||
CHAT_REFINE_PROMPT = ChatPromptTemplate.from_messages(messages)
|
||||
CHAT_REFINE_PROMPT = ChatPromptTemplate.from_messages(
|
||||
[("human", "{question}"), ("ai", "{existing_answer}"), ("human", "refine_template")]
|
||||
)
|
||||
REFINE_PROMPT_SELECTOR = ConditionalPromptSelector(
|
||||
default_prompt=DEFAULT_REFINE_PROMPT,
|
||||
conditionals=[(is_chat_model, CHAT_REFINE_PROMPT)],
|
||||
@@ -48,28 +43,25 @@ REFINE_PROMPT_SELECTOR = ConditionalPromptSelector(
|
||||
|
||||
DEFAULT_TEXT_QA_PROMPT_TMPL = (
|
||||
"Context information is below. \n"
|
||||
"---------------------\n"
|
||||
"{context_str}"
|
||||
"\n---------------------\n"
|
||||
"------------\n"
|
||||
"{context_str}\n"
|
||||
"------------\n"
|
||||
"Given the context information and not prior knowledge, "
|
||||
"answer the question: {question}\n"
|
||||
)
|
||||
DEFAULT_TEXT_QA_PROMPT = PromptTemplate(
|
||||
input_variables=["context_str", "question"], template=DEFAULT_TEXT_QA_PROMPT_TMPL
|
||||
)
|
||||
DEFAULT_TEXT_QA_PROMPT = PromptTemplate.from_template(DEFAULT_TEXT_QA_PROMPT_TMPL)
|
||||
|
||||
chat_qa_prompt_template = (
|
||||
"Context information is below. \n"
|
||||
"---------------------\n"
|
||||
"{context_str}"
|
||||
"\n---------------------\n"
|
||||
"Context information is below.\n"
|
||||
"------------\n"
|
||||
"{context_str}\n"
|
||||
"------------\n"
|
||||
"Given the context information and not prior knowledge, "
|
||||
"answer any questions"
|
||||
)
|
||||
messages = [
|
||||
SystemMessagePromptTemplate.from_template(chat_qa_prompt_template),
|
||||
HumanMessagePromptTemplate.from_template("{question}"),
|
||||
]
|
||||
CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
|
||||
CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(
|
||||
[("system", chat_qa_prompt_template), ("human", "{question}")]
|
||||
)
|
||||
QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(
|
||||
default_prompt=DEFAULT_TEXT_QA_PROMPT,
|
||||
conditionals=[(is_chat_model, CHAT_QUESTION_PROMPT)],
|
||||
|
@@ -1,21 +1,16 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
REFINE_PROMPT_TMPL = (
|
||||
"Your job is to produce a final summary\n"
|
||||
"We have provided an existing summary up to a certain point: {existing_answer}\n"
|
||||
"We have the opportunity to refine the existing summary"
|
||||
"(only if needed) with some more context below.\n"
|
||||
"------------\n"
|
||||
"{text}\n"
|
||||
"------------\n"
|
||||
"Given the new context, refine the original summary\n"
|
||||
"If the context isn't useful, return the original summary."
|
||||
)
|
||||
REFINE_PROMPT = PromptTemplate(
|
||||
input_variables=["existing_answer", "text"],
|
||||
template=REFINE_PROMPT_TMPL,
|
||||
)
|
||||
REFINE_PROMPT_TMPL = """\
|
||||
Your job is to produce a final summary.
|
||||
We have provided an existing summary up to a certain point: {existing_answer}
|
||||
We have the opportunity to refine the existing summary (only if needed) with some more context below.
|
||||
------------
|
||||
{text}
|
||||
------------
|
||||
Given the new context, refine the original summary.
|
||||
If the context isn't useful, return the original summary.\
|
||||
""" # noqa: E501
|
||||
REFINE_PROMPT = PromptTemplate.from_template(REFINE_PROMPT_TMPL)
|
||||
|
||||
|
||||
prompt_template = """Write a concise summary of the following:
|
||||
@@ -25,4 +20,4 @@ prompt_template = """Write a concise summary of the following:
|
||||
|
||||
|
||||
CONCISE SUMMARY:"""
|
||||
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
|
||||
PROMPT = PromptTemplate.from_template(prompt_template)
|
||||
|
@@ -332,9 +332,9 @@ def index(
|
||||
uids_to_delete = record_manager.list_keys(before=index_start_dt)
|
||||
|
||||
if uids_to_delete:
|
||||
# Then delete from vector store.
|
||||
vector_store.delete(uids_to_delete)
|
||||
# First delete from record store.
|
||||
vector_store.delete(uids_to_delete)
|
||||
# Then delete from record manager.
|
||||
record_manager.delete_keys(uids_to_delete)
|
||||
num_deleted = len(uids_to_delete)
|
||||
|
||||
|
@@ -14,10 +14,12 @@ allow it to work with a variety of SQL as a backend.
|
||||
* Keys can be deleted.
|
||||
"""
|
||||
import contextlib
|
||||
import decimal
|
||||
import uuid
|
||||
from typing import Any, Dict, Generator, List, Optional, Sequence
|
||||
from typing import Any, Dict, Generator, List, Optional, Sequence, Union
|
||||
|
||||
from sqlalchemy import (
|
||||
URL,
|
||||
Column,
|
||||
Engine,
|
||||
Float,
|
||||
@@ -28,7 +30,6 @@ from sqlalchemy import (
|
||||
create_engine,
|
||||
text,
|
||||
)
|
||||
from sqlalchemy.dialects.sqlite import insert
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
@@ -77,7 +78,7 @@ class SQLRecordManager(RecordManager):
|
||||
namespace: str,
|
||||
*,
|
||||
engine: Optional[Engine] = None,
|
||||
db_url: Optional[str] = None,
|
||||
db_url: Union[None, str, URL] = None,
|
||||
engine_kwargs: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Initialize the SQLRecordManager.
|
||||
@@ -114,6 +115,7 @@ class SQLRecordManager(RecordManager):
|
||||
raise AssertionError("Something went wrong with configuration of engine.")
|
||||
|
||||
self.engine = _engine
|
||||
self.dialect = _engine.dialect.name
|
||||
self.session_factory = sessionmaker(bind=self.engine)
|
||||
|
||||
def create_schema(self) -> None:
|
||||
@@ -145,8 +147,16 @@ class SQLRecordManager(RecordManager):
|
||||
# 2440587.5 - constant represents the Julian day number for January 1, 1970
|
||||
# 86400.0 - constant represents the number of seconds
|
||||
# in a day (24 hours * 60 minutes * 60 seconds)
|
||||
query = text("SELECT (julianday('now') - 2440587.5) * 86400.0;")
|
||||
if self.dialect == "sqlite":
|
||||
query = text("SELECT (julianday('now') - 2440587.5) * 86400.0;")
|
||||
elif self.dialect == "postgresql":
|
||||
query = text("SELECT EXTRACT (EPOCH FROM CURRENT_TIMESTAMP);")
|
||||
else:
|
||||
raise NotImplementedError(f"Not implemented for dialect {self.dialect}")
|
||||
|
||||
dt = session.execute(query).scalar()
|
||||
if isinstance(dt, decimal.Decimal):
|
||||
dt = float(dt)
|
||||
if not isinstance(dt, float):
|
||||
raise AssertionError(f"Unexpected type for datetime: {type(dt)}")
|
||||
return dt
|
||||
@@ -192,17 +202,37 @@ class SQLRecordManager(RecordManager):
|
||||
]
|
||||
|
||||
with self._make_session() as session:
|
||||
# Note: uses SQLite insert to make on_conflict_do_update work.
|
||||
# This code needs to be generalized a bit to work with more dialects.
|
||||
insert_stmt = insert(UpsertionRecord).values(records_to_upsert)
|
||||
stmt = insert_stmt.on_conflict_do_update( # type: ignore[attr-defined]
|
||||
[UpsertionRecord.key, UpsertionRecord.namespace],
|
||||
set_=dict(
|
||||
# attr-defined type ignore
|
||||
updated_at=insert_stmt.excluded.updated_at, # type: ignore
|
||||
group_id=insert_stmt.excluded.group_id, # type: ignore
|
||||
),
|
||||
)
|
||||
if self.dialect == "sqlite":
|
||||
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
||||
|
||||
# Note: uses SQLite insert to make on_conflict_do_update work.
|
||||
# This code needs to be generalized a bit to work with more dialects.
|
||||
insert_stmt = sqlite_insert(UpsertionRecord).values(records_to_upsert)
|
||||
stmt = insert_stmt.on_conflict_do_update( # type: ignore[attr-defined]
|
||||
[UpsertionRecord.key, UpsertionRecord.namespace],
|
||||
set_=dict(
|
||||
# attr-defined type ignore
|
||||
updated_at=insert_stmt.excluded.updated_at, # type: ignore
|
||||
group_id=insert_stmt.excluded.group_id, # type: ignore
|
||||
),
|
||||
)
|
||||
elif self.dialect == "postgresql":
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
|
||||
# Note: uses SQLite insert to make on_conflict_do_update work.
|
||||
# This code needs to be generalized a bit to work with more dialects.
|
||||
insert_stmt = pg_insert(UpsertionRecord).values(records_to_upsert)
|
||||
stmt = insert_stmt.on_conflict_do_update( # type: ignore[attr-defined]
|
||||
"uix_key_namespace", # Name of constraint
|
||||
set_=dict(
|
||||
# attr-defined type ignore
|
||||
updated_at=insert_stmt.excluded.updated_at, # type: ignore
|
||||
group_id=insert_stmt.excluded.group_id, # type: ignore
|
||||
),
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(f"Unsupported dialect {self.dialect}")
|
||||
|
||||
session.execute(stmt)
|
||||
session.commit()
|
||||
|
||||
|
@@ -62,6 +62,7 @@ from langchain.llms.mosaicml import MosaicML
|
||||
from langchain.llms.nlpcloud import NLPCloud
|
||||
from langchain.llms.octoai_endpoint import OctoAIEndpoint
|
||||
from langchain.llms.ollama import Ollama
|
||||
from langchain.llms.opaqueprompts import OpaquePrompts
|
||||
from langchain.llms.openai import AzureOpenAI, OpenAI, OpenAIChat
|
||||
from langchain.llms.openllm import OpenLLM
|
||||
from langchain.llms.openlm import OpenLM
|
||||
@@ -69,7 +70,6 @@ from langchain.llms.petals import Petals
|
||||
from langchain.llms.pipelineai import PipelineAI
|
||||
from langchain.llms.predibase import Predibase
|
||||
from langchain.llms.predictionguard import PredictionGuard
|
||||
from langchain.llms.promptguard import PromptGuard
|
||||
from langchain.llms.promptlayer_openai import PromptLayerOpenAI, PromptLayerOpenAIChat
|
||||
from langchain.llms.replicate import Replicate
|
||||
from langchain.llms.rwkv import RWKV
|
||||
@@ -142,7 +142,7 @@ __all__ = [
|
||||
"PredictionGuard",
|
||||
"PromptLayerOpenAI",
|
||||
"PromptLayerOpenAIChat",
|
||||
"PromptGuard",
|
||||
"OpaquePrompts",
|
||||
"RWKV",
|
||||
"Replicate",
|
||||
"SagemakerEndpoint",
|
||||
@@ -207,7 +207,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||
"petals": Petals,
|
||||
"pipelineai": PipelineAI,
|
||||
"predibase": Predibase,
|
||||
"promptguard": PromptGuard,
|
||||
"opaqueprompts": OpaquePrompts,
|
||||
"replicate": Replicate,
|
||||
"rwkv": RWKV,
|
||||
"sagemaker_endpoint": SagemakerEndpoint,
|
||||
|
@@ -10,23 +10,23 @@ from langchain.utils import get_from_dict_or_env
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PromptGuard(LLM):
|
||||
"""An LLM wrapper that uses PromptGuard to sanitize prompts.
|
||||
class OpaquePrompts(LLM):
|
||||
"""An LLM wrapper that uses OpaquePrompts to sanitize prompts.
|
||||
|
||||
Wraps another LLM and sanitizes prompts before passing it to the LLM, then
|
||||
de-sanitizes the response.
|
||||
|
||||
To use, you should have the ``promptguard`` python package installed,
|
||||
and the environment variable ``PROMPTGUARD_API_KEY`` set with
|
||||
To use, you should have the ``opaqueprompts`` python package installed,
|
||||
and the environment variable ``OPAQUEPROMPTS_API_KEY`` set with
|
||||
your API key, or pass it as a named parameter to the constructor.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import PromptGuard
|
||||
from langchain.llms import OpaquePrompts
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
prompt_guard_llm = PromptGuard(base_llm=ChatOpenAI())
|
||||
op_llm = OpaquePrompts(base_llm=ChatOpenAI())
|
||||
"""
|
||||
|
||||
base_llm: BaseLanguageModel
|
||||
@@ -39,29 +39,29 @@ class PromptGuard(LLM):
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validates that the PromptGuard API key and the Python package exist."""
|
||||
"""Validates that the OpaquePrompts API key and the Python package exist."""
|
||||
try:
|
||||
import promptguard as pg
|
||||
import opaqueprompts as op
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import the `promptguard` Python package, "
|
||||
"please install it with `pip install promptguard`."
|
||||
"Could not import the `opaqueprompts` Python package, "
|
||||
"please install it with `pip install opaqueprompts`."
|
||||
)
|
||||
if pg.__package__ is None:
|
||||
if op.__package__ is None:
|
||||
raise ValueError(
|
||||
"Could not properly import `promptguard`, "
|
||||
"promptguard.__package__ is None."
|
||||
"Could not properly import `opaqueprompts`, "
|
||||
"opaqueprompts.__package__ is None."
|
||||
)
|
||||
|
||||
api_key = get_from_dict_or_env(
|
||||
values, "promptguard_api_key", "PROMPTGUARD_API_KEY", default=""
|
||||
values, "opaqueprompts_api_key", "OPAQUEPROMPTS_API_KEY", default=""
|
||||
)
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"Could not find PROMPTGUARD_API_KEY in the environment. "
|
||||
"Please set it to your PromptGuard API key."
|
||||
"You can get it by creating an account on the PromptGuard website: "
|
||||
"https://promptguard.opaque.co/ ."
|
||||
"Could not find OPAQUEPROMPTS_API_KEY in the environment. "
|
||||
"Please set it to your OpaquePrompts API key."
|
||||
"You can get it by creating an account on the OpaquePrompts website: "
|
||||
"https://opaqueprompts.opaque.co/ ."
|
||||
)
|
||||
return values
|
||||
|
||||
@@ -83,14 +83,14 @@ class PromptGuard(LLM):
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
response = prompt_guard_llm("Tell me a joke.")
|
||||
response = op_llm("Tell me a joke.")
|
||||
"""
|
||||
import promptguard as pg
|
||||
import opaqueprompts as op
|
||||
|
||||
_run_manager = run_manager or CallbackManagerForLLMRun.get_noop_manager()
|
||||
|
||||
# sanitize the prompt by replacing the sensitive information with a placeholder
|
||||
sanitize_response: pg.SanitizeResponse = pg.sanitize([prompt])
|
||||
sanitize_response: op.SanitizeResponse = op.sanitize([prompt])
|
||||
sanitized_prompt_value_str = sanitize_response.sanitized_texts[0]
|
||||
|
||||
# TODO: Add in callbacks once child runs for LLMs are supported by LangSmith.
|
||||
@@ -101,7 +101,7 @@ class PromptGuard(LLM):
|
||||
)
|
||||
|
||||
# desanitize the response by restoring the original sensitive information
|
||||
desanitize_response: pg.DesanitizeResponse = pg.desanitize(
|
||||
desanitize_response: op.DesanitizeResponse = op.desanitize(
|
||||
llm_response,
|
||||
secure_context=sanitize_response.secure_context,
|
||||
)
|
||||
@@ -113,4 +113,4 @@ class PromptGuard(LLM):
|
||||
|
||||
This is an override of the base class method.
|
||||
"""
|
||||
return "promptguard"
|
||||
return "opaqueprompts"
|
@@ -13,7 +13,7 @@ def _replace_new_line(match: re.Match[str]) -> str:
|
||||
value = re.sub(r"\n", r"\\n", value)
|
||||
value = re.sub(r"\r", r"\\r", value)
|
||||
value = re.sub(r"\t", r"\\t", value)
|
||||
value = re.sub('"', r"\"", value)
|
||||
value = re.sub(r'(?<!\\)"', r"\"", value)
|
||||
|
||||
return match.group(1) + value + match.group(3)
|
||||
|
||||
|
@@ -150,7 +150,7 @@ class WebResearchRetriever(BaseRetriever):
|
||||
return query.strip()
|
||||
|
||||
def search_tool(self, query: str, num_search_results: int = 1) -> List[dict]:
|
||||
"""Returns num_serch_results pages per Google search."""
|
||||
"""Returns num_search_results pages per Google search."""
|
||||
query_clean = self.clean_search_query(query)
|
||||
result = self.search.results(query_clean, num_search_results)
|
||||
return result
|
||||
|
@@ -273,7 +273,11 @@ class ChildTool(BaseTool):
|
||||
Add run_manager: Optional[AsyncCallbackManagerForToolRun] = None
|
||||
to child implementations to enable tracing,
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
return await asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
partial(self._run, **kwargs),
|
||||
*args,
|
||||
)
|
||||
|
||||
def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
|
||||
# For backwards compatibility, if run_input is a string,
|
||||
@@ -522,7 +526,10 @@ class Tool(BaseTool):
|
||||
if new_argument_supported
|
||||
else await self.coroutine(*args, **kwargs)
|
||||
)
|
||||
raise NotImplementedError("Tool does not support async")
|
||||
else:
|
||||
return await asyncio.get_running_loop().run_in_executor(
|
||||
None, partial(self._run, run_manager=run_manager, **kwargs), *args
|
||||
)
|
||||
|
||||
# TODO: this is for backwards compatibility, remove in future
|
||||
def __init__(
|
||||
@@ -634,7 +641,12 @@ class StructuredTool(BaseTool):
|
||||
if new_argument_supported
|
||||
else await self.coroutine(*args, **kwargs)
|
||||
)
|
||||
raise NotImplementedError("Tool does not support async")
|
||||
return await asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
self._run,
|
||||
partial(self._run, run_manager=run_manager, **kwargs),
|
||||
*args,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_function(
|
||||
|
@@ -4,6 +4,7 @@ Other LangChain classes use **Utilities** to interact with third-part systems
|
||||
and packages.
|
||||
"""
|
||||
from langchain.utilities.alpha_vantage import AlphaVantageAPIWrapper
|
||||
from langchain.utilities.apify import ApifyWrapper
|
||||
from langchain.utilities.arxiv import ArxivAPIWrapper
|
||||
from langchain.utilities.awslambda import LambdaWrapper
|
||||
from langchain.utilities.bash import BashProcess
|
||||
@@ -38,6 +39,7 @@ from langchain.utilities.zapier import ZapierNLAWrapper
|
||||
|
||||
__all__ = [
|
||||
"AlphaVantageAPIWrapper",
|
||||
"ApifyWrapper",
|
||||
"ArxivAPIWrapper",
|
||||
"BashProcess",
|
||||
"BibtexparserWrapper",
|
||||
|
194
libs/langchain/langchain/utilities/apify.py
Normal file
194
libs/langchain/langchain/utilities/apify.py
Normal file
@@ -0,0 +1,194 @@
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
from langchain.document_loaders import ApifyDatasetLoader
|
||||
from langchain.document_loaders.base import Document
|
||||
from langchain.pydantic_v1 import BaseModel, root_validator
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class ApifyWrapper(BaseModel):
|
||||
"""Wrapper around Apify.
|
||||
To use, you should have the ``apify-client`` python package installed,
|
||||
and the environment variable ``APIFY_API_TOKEN`` set with your API key, or pass
|
||||
`apify_api_token` as a named parameter to the constructor.
|
||||
"""
|
||||
|
||||
apify_client: Any
|
||||
apify_client_async: Any
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate environment.
|
||||
Validate that an Apify API token is set and the apify-client
|
||||
Python package exists in the current environment.
|
||||
"""
|
||||
apify_api_token = get_from_dict_or_env(
|
||||
values, "apify_api_token", "APIFY_API_TOKEN"
|
||||
)
|
||||
|
||||
try:
|
||||
from apify_client import ApifyClient, ApifyClientAsync
|
||||
|
||||
values["apify_client"] = ApifyClient(apify_api_token)
|
||||
values["apify_client_async"] = ApifyClientAsync(apify_api_token)
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import apify-client Python package. "
|
||||
"Please install it with `pip install apify-client`."
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
def call_actor(
|
||||
self,
|
||||
actor_id: str,
|
||||
run_input: Dict,
|
||||
dataset_mapping_function: Callable[[Dict], Document],
|
||||
*,
|
||||
build: Optional[str] = None,
|
||||
memory_mbytes: Optional[int] = None,
|
||||
timeout_secs: Optional[int] = None,
|
||||
) -> ApifyDatasetLoader:
|
||||
"""Run an Actor on the Apify platform and wait for results to be ready.
|
||||
Args:
|
||||
actor_id (str): The ID or name of the Actor on the Apify platform.
|
||||
run_input (Dict): The input object of the Actor that you're trying to run.
|
||||
dataset_mapping_function (Callable): A function that takes a single
|
||||
dictionary (an Apify dataset item) and converts it to an
|
||||
instance of the Document class.
|
||||
build (str, optional): Optionally specifies the actor build to run.
|
||||
It can be either a build tag or build number.
|
||||
memory_mbytes (int, optional): Optional memory limit for the run,
|
||||
in megabytes.
|
||||
timeout_secs (int, optional): Optional timeout for the run, in seconds.
|
||||
Returns:
|
||||
ApifyDatasetLoader: A loader that will fetch the records from the
|
||||
Actor run's default dataset.
|
||||
"""
|
||||
actor_call = self.apify_client.actor(actor_id).call(
|
||||
run_input=run_input,
|
||||
build=build,
|
||||
memory_mbytes=memory_mbytes,
|
||||
timeout_secs=timeout_secs,
|
||||
)
|
||||
|
||||
return ApifyDatasetLoader(
|
||||
dataset_id=actor_call["defaultDatasetId"],
|
||||
dataset_mapping_function=dataset_mapping_function,
|
||||
)
|
||||
|
||||
async def acall_actor(
|
||||
self,
|
||||
actor_id: str,
|
||||
run_input: Dict,
|
||||
dataset_mapping_function: Callable[[Dict], Document],
|
||||
*,
|
||||
build: Optional[str] = None,
|
||||
memory_mbytes: Optional[int] = None,
|
||||
timeout_secs: Optional[int] = None,
|
||||
) -> ApifyDatasetLoader:
|
||||
"""Run an Actor on the Apify platform and wait for results to be ready.
|
||||
Args:
|
||||
actor_id (str): The ID or name of the Actor on the Apify platform.
|
||||
run_input (Dict): The input object of the Actor that you're trying to run.
|
||||
dataset_mapping_function (Callable): A function that takes a single
|
||||
dictionary (an Apify dataset item) and converts it to
|
||||
an instance of the Document class.
|
||||
build (str, optional): Optionally specifies the actor build to run.
|
||||
It can be either a build tag or build number.
|
||||
memory_mbytes (int, optional): Optional memory limit for the run,
|
||||
in megabytes.
|
||||
timeout_secs (int, optional): Optional timeout for the run, in seconds.
|
||||
Returns:
|
||||
ApifyDatasetLoader: A loader that will fetch the records from the
|
||||
Actor run's default dataset.
|
||||
"""
|
||||
actor_call = await self.apify_client_async.actor(actor_id).call(
|
||||
run_input=run_input,
|
||||
build=build,
|
||||
memory_mbytes=memory_mbytes,
|
||||
timeout_secs=timeout_secs,
|
||||
)
|
||||
|
||||
return ApifyDatasetLoader(
|
||||
dataset_id=actor_call["defaultDatasetId"],
|
||||
dataset_mapping_function=dataset_mapping_function,
|
||||
)
|
||||
|
||||
def call_actor_task(
|
||||
self,
|
||||
task_id: str,
|
||||
task_input: Dict,
|
||||
dataset_mapping_function: Callable[[Dict], Document],
|
||||
*,
|
||||
build: Optional[str] = None,
|
||||
memory_mbytes: Optional[int] = None,
|
||||
timeout_secs: Optional[int] = None,
|
||||
) -> ApifyDatasetLoader:
|
||||
"""Run a saved Actor task on Apify and wait for results to be ready.
|
||||
Args:
|
||||
task_id (str): The ID or name of the task on the Apify platform.
|
||||
task_input (Dict): The input object of the task that you're trying to run.
|
||||
Overrides the task's saved input.
|
||||
dataset_mapping_function (Callable): A function that takes a single
|
||||
dictionary (an Apify dataset item) and converts it to an
|
||||
instance of the Document class.
|
||||
build (str, optional): Optionally specifies the actor build to run.
|
||||
It can be either a build tag or build number.
|
||||
memory_mbytes (int, optional): Optional memory limit for the run,
|
||||
in megabytes.
|
||||
timeout_secs (int, optional): Optional timeout for the run, in seconds.
|
||||
Returns:
|
||||
ApifyDatasetLoader: A loader that will fetch the records from the
|
||||
task run's default dataset.
|
||||
"""
|
||||
task_call = self.apify_client.task(task_id).call(
|
||||
task_input=task_input,
|
||||
build=build,
|
||||
memory_mbytes=memory_mbytes,
|
||||
timeout_secs=timeout_secs,
|
||||
)
|
||||
|
||||
return ApifyDatasetLoader(
|
||||
dataset_id=task_call["defaultDatasetId"],
|
||||
dataset_mapping_function=dataset_mapping_function,
|
||||
)
|
||||
|
||||
async def acall_actor_task(
|
||||
self,
|
||||
task_id: str,
|
||||
task_input: Dict,
|
||||
dataset_mapping_function: Callable[[Dict], Document],
|
||||
*,
|
||||
build: Optional[str] = None,
|
||||
memory_mbytes: Optional[int] = None,
|
||||
timeout_secs: Optional[int] = None,
|
||||
) -> ApifyDatasetLoader:
|
||||
"""Run a saved Actor task on Apify and wait for results to be ready.
|
||||
Args:
|
||||
task_id (str): The ID or name of the task on the Apify platform.
|
||||
task_input (Dict): The input object of the task that you're trying to run.
|
||||
Overrides the task's saved input.
|
||||
dataset_mapping_function (Callable): A function that takes a single
|
||||
dictionary (an Apify dataset item) and converts it to an
|
||||
instance of the Document class.
|
||||
build (str, optional): Optionally specifies the actor build to run.
|
||||
It can be either a build tag or build number.
|
||||
memory_mbytes (int, optional): Optional memory limit for the run,
|
||||
in megabytes.
|
||||
timeout_secs (int, optional): Optional timeout for the run, in seconds.
|
||||
Returns:
|
||||
ApifyDatasetLoader: A loader that will fetch the records from the
|
||||
task run's default dataset.
|
||||
"""
|
||||
task_call = await self.apify_client_async.task(task_id).call(
|
||||
task_input=task_input,
|
||||
build=build,
|
||||
memory_mbytes=memory_mbytes,
|
||||
timeout_secs=timeout_secs,
|
||||
)
|
||||
|
||||
return ApifyDatasetLoader(
|
||||
dataset_id=task_call["defaultDatasetId"],
|
||||
dataset_mapping_function=dataset_mapping_function,
|
||||
)
|
@@ -31,16 +31,16 @@ def sanitize(
|
||||
The `secure_context` needs to be passed to the `desanitize` function.
|
||||
"""
|
||||
try:
|
||||
import promptguard as pg
|
||||
import opaqueprompts as op
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import the `promptguard` Python package, "
|
||||
"please install it with `pip install promptguard`."
|
||||
"Could not import the `opaqueprompts` Python package, "
|
||||
"please install it with `pip install opaqueprompts`."
|
||||
)
|
||||
|
||||
if isinstance(input, str):
|
||||
# the input could be a string, so we sanitize the string
|
||||
sanitize_response: pg.SanitizeResponse = pg.sanitize([input])
|
||||
sanitize_response: op.SanitizeResponse = op.sanitize([input])
|
||||
return {
|
||||
"sanitized_input": sanitize_response.sanitized_texts[0],
|
||||
"secure_context": sanitize_response.secure_context,
|
||||
@@ -55,7 +55,7 @@ def sanitize(
|
||||
values.append(input[key])
|
||||
|
||||
# sanitize the values
|
||||
sanitize_values_response: pg.SanitizeResponse = pg.sanitize(values)
|
||||
sanitize_values_response: op.SanitizeResponse = op.sanitize(values)
|
||||
|
||||
# reconstruct the dict with the sanitized values
|
||||
sanitized_input_values = sanitize_values_response.sanitized_texts
|
||||
@@ -85,13 +85,13 @@ def desanitize(sanitized_text: str, secure_context: bytes) -> str:
|
||||
De-sanitized text.
|
||||
"""
|
||||
try:
|
||||
import promptguard as pg
|
||||
import opaqueprompts as op
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import the `promptguard` Python package, "
|
||||
"please install it with `pip install promptguard`."
|
||||
"Could not import the `opaqueprompts` Python package, "
|
||||
"please install it with `pip install opaqueprompts`."
|
||||
)
|
||||
desanitize_response: pg.DesanitizeResponse = pg.desanitize(
|
||||
desanitize_response: op.DesanitizeResponse = op.desanitize(
|
||||
sanitized_text, secure_context
|
||||
)
|
||||
return desanitize_response.desanitized_text
|
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain"
|
||||
version = "0.0.277"
|
||||
version = "0.0.278"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
|
27
libs/langchain/scripts/check_pydantic.sh
Executable file
27
libs/langchain/scripts/check_pydantic.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This script searches for lines starting with "import pydantic" or "from pydantic"
|
||||
# in tracked files within a Git repository.
|
||||
#
|
||||
# Usage: ./scripts/check_pydantic.sh /path/to/repository
|
||||
|
||||
# Check if a path argument is provided
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 /path/to/repository"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repository_path="$1"
|
||||
|
||||
# Search for lines matching the pattern within the specified repository
|
||||
result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
|
||||
|
||||
# Check if any matching lines were found
|
||||
if [ -n "$result" ]; then
|
||||
echo "ERROR: The following lines need to be updated:"
|
||||
echo "$result"
|
||||
echo "Please replace the code with an import from langchain.pydantic_v1."
|
||||
echo "For example, replace 'from pydantic import BaseModel'"
|
||||
echo "with 'from langchain.pydantic_v1 import BaseModel'"
|
||||
exit 1
|
||||
fi
|
@@ -1,7 +1,7 @@
|
||||
import langchain.utilities.promptguard as pgf
|
||||
import langchain.utilities.opaqueprompts as op
|
||||
from langchain import LLMChain, PromptTemplate
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.llms.promptguard import PromptGuard
|
||||
from langchain.llms.opaqueprompts import OpaquePrompts
|
||||
from langchain.memory import ConversationBufferWindowMemory
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
from langchain.schema.runnable import RunnableMap
|
||||
@@ -42,10 +42,10 @@ Question: ```{question}```
|
||||
"""
|
||||
|
||||
|
||||
def test_promptguard() -> None:
|
||||
def test_opaqueprompts() -> None:
|
||||
chain = LLMChain(
|
||||
prompt=PromptTemplate.from_template(prompt_template),
|
||||
llm=PromptGuard(llm=OpenAI()),
|
||||
llm=OpaquePrompts(llm=OpenAI()),
|
||||
memory=ConversationBufferWindowMemory(k=2),
|
||||
)
|
||||
|
||||
@@ -58,11 +58,11 @@ def test_promptguard() -> None:
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_promptguard_functions() -> None:
|
||||
def test_opaqueprompts_functions() -> None:
|
||||
prompt = (PromptTemplate.from_template(prompt_template),)
|
||||
llm = OpenAI()
|
||||
pg_chain = (
|
||||
pgf.sanitize
|
||||
op.sanitize
|
||||
| RunnableMap(
|
||||
{
|
||||
"response": (lambda x: x["sanitized_input"]) # type: ignore
|
||||
@@ -72,7 +72,7 @@ def test_promptguard_functions() -> None:
|
||||
"secure_context": lambda x: x["secure_context"],
|
||||
}
|
||||
)
|
||||
| (lambda x: pgf.desanitize(x["response"], x["secure_context"]))
|
||||
| (lambda x: op.desanitize(x["response"], x["secure_context"]))
|
||||
)
|
||||
|
||||
pg_chain.invoke(
|
@@ -67,6 +67,34 @@ JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{"foo": "bar", "bar": "foo"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_PYTHON_DICT = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": {"foo": "bar", "bar": "foo"}
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
NO_TICKS = """{
|
||||
"foo": "bar"
|
||||
}"""
|
||||
@@ -131,3 +159,27 @@ def test_parse_json_with_code_blocks() -> None:
|
||||
"action": "Final Answer",
|
||||
"action_input": '```bar\n<div id="1" class="value">\n\ttext\n</div>```',
|
||||
}
|
||||
|
||||
|
||||
TEST_CASES_ESCAPED_QUOTES = [
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("json_string", TEST_CASES_ESCAPED_QUOTES)
|
||||
def test_parse_nested_json_with_escaped_quotes(json_string: str) -> None:
|
||||
parsed = parse_json_markdown(json_string)
|
||||
assert parsed == {
|
||||
"action": "Final Answer",
|
||||
"action_input": '{"foo": "bar", "bar": "foo"}',
|
||||
}
|
||||
|
||||
|
||||
def test_parse_json_with_python_dict() -> None:
|
||||
parsed = parse_json_markdown(JSON_WITH_PYTHON_DICT)
|
||||
assert parsed == {
|
||||
"action": "Final Answer",
|
||||
"action_input": {"foo": "bar", "bar": "foo"},
|
||||
}
|
||||
|
Reference in New Issue
Block a user