Updating docstings in utilities (#8411)

Updating docstrings on utility packages
 @baskaryan
This commit is contained in:
Gordon Clark 2023-08-01 07:34:53 +08:00 committed by GitHub
parent bca0749a11
commit 64d0a0fcc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 149 additions and 41 deletions

View File

@ -21,15 +21,30 @@ class ArxivAPIWrapper(BaseModel):
It limits the Document content by doc_content_chars_max. It limits the Document content by doc_content_chars_max.
Set doc_content_chars_max=None if you don't want to limit the content size. Set doc_content_chars_max=None if you don't want to limit the content size.
Parameters: Args:
top_k_results: number of the top-scored document used for the arxiv tool top_k_results: number of the top-scored document used for the arxiv tool
ARXIV_MAX_QUERY_LENGTH: the cut limit on the query used for the arxiv tool. ARXIV_MAX_QUERY_LENGTH: the cut limit on the query used for the arxiv tool.
load_max_docs: a limit to the number of loaded documents load_max_docs: a limit to the number of loaded documents
load_all_available_meta: load_all_available_meta:
if True: the `metadata` of the loaded Documents gets all available meta info if True: the `metadata` of the loaded Documents contains all available
(see https://lukasschwab.me/arxiv.py/index.html#Result), meta info (see https://lukasschwab.me/arxiv.py/index.html#Result),
if False: the `metadata` gets only the most informative fields. if False: the `metadata` contains only the published date, title,
authors and summary.
doc_content_chars_max: an optional cut limit for the length of a document's
content
Example:
.. code-block:: python
from langchain.utilities.arxiv import ArxivAPIWrapper
arxiv = ArxivAPIWrapper(
top_k_results = 3,
ARXIV_MAX_QUERY_LENGTH = 300,
load_max_docs = 3,
load_all_available_meta = False,
doc_content_chars_max = 40000
)
arxiv.run("tree of thought llm)
""" """
arxiv_search: Any #: :meta private: arxiv_search: Any #: :meta private:
@ -62,11 +77,17 @@ class ArxivAPIWrapper(BaseModel):
def run(self, query: str) -> str: def run(self, query: str) -> str:
""" """
Run Arxiv search and get the article meta information. Performs an arxiv search and A single string
See https://lukasschwab.me/arxiv.py/index.html#Search with the publish date, title, authors, and summary
See https://lukasschwab.me/arxiv.py/index.html#Result for each article separated by two newlines.
It uses only the most informative fields of article meta information.
""" If an error occurs or no documents found, error text
is returned instead. Wrapper for
https://lukasschwab.me/arxiv.py/index.html#Search
Args:
query: a plaintext search query
""" # noqa: E501
try: try:
results = self.arxiv_search( # type: ignore results = self.arxiv_search( # type: ignore
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
@ -74,7 +95,8 @@ class ArxivAPIWrapper(BaseModel):
except self.arxiv_exceptions as ex: except self.arxiv_exceptions as ex:
return f"Arxiv exception: {ex}" return f"Arxiv exception: {ex}"
docs = [ docs = [
f"Published: {result.updated.date()}\nTitle: {result.title}\n" f"Published: {result.updated.date()}\n"
f"Title: {result.title}\n"
f"Authors: {', '.join(a.name for a in result.authors)}\n" f"Authors: {', '.join(a.name for a in result.authors)}\n"
f"Summary: {result.summary}" f"Summary: {result.summary}"
for result in results for result in results
@ -91,7 +113,12 @@ class ArxivAPIWrapper(BaseModel):
Returns: a list of documents with the document.page_content in text format Returns: a list of documents with the document.page_content in text format
""" Performs an arxiv search, downloads the top k results as PDFs, loads
them as Documents, and returns them in a List.
Args:
query: a plaintext search query
""" # noqa: E501
try: try:
import fitz import fitz
except ImportError: except ImportError:

View File

@ -7,19 +7,27 @@ from pydantic import BaseModel, Extra, root_validator
class LambdaWrapper(BaseModel): class LambdaWrapper(BaseModel):
"""Wrapper for AWS Lambda SDK. """Wrapper for AWS Lambda SDK.
To use, you should have the ``boto3`` package installed
and a lambda functions built from the AWS Console or
CLI. Set up your AWS credentials with ``aws configure``
Docs for using: Example:
.. code-block:: bash
1. pip install boto3 pip install boto3
2. Create a lambda function using the AWS Console or CLI
3. Run `aws configure` and enter your AWS credentials aws configure
""" """
lambda_client: Any #: :meta private: lambda_client: Any #: :meta private:
"""The configured boto3 client"""
function_name: Optional[str] = None function_name: Optional[str] = None
"""The name of your lambda function"""
awslambda_tool_name: Optional[str] = None awslambda_tool_name: Optional[str] = None
"""If passing to an agent as a tool, the tool name"""
awslambda_tool_description: Optional[str] = None awslambda_tool_description: Optional[str] = None
"""If passing to an agent as a tool, the description"""
class Config: class Config:
"""Configuration for this pydantic object.""" """Configuration for this pydantic object."""
@ -44,7 +52,15 @@ class LambdaWrapper(BaseModel):
return values return values
def run(self, query: str) -> str: def run(self, query: str) -> str:
"""Invoke Lambda function and parse result.""" """
Invokes the lambda function and returns the
result.
Args:
query: an input to passed to the lambda
function as the ``body`` of a JSON
object.
""" # noqa: E501
res = self.lambda_client.invoke( res = self.lambda_client.invoke(
FunctionName=self.function_name, FunctionName=self.function_name,
InvocationType="RequestResponse", InvocationType="RequestResponse",

View File

@ -11,23 +11,36 @@ if TYPE_CHECKING:
import pexpect import pexpect
def _lazy_import_pexpect() -> pexpect:
"""Import pexpect only when needed."""
if platform.system() == "Windows":
raise ValueError("Persistent bash processes are not yet supported on Windows.")
try:
import pexpect
except ImportError:
raise ImportError(
"pexpect required for persistent bash processes."
" To install, run `pip install pexpect`."
)
return pexpect
class BashProcess: class BashProcess:
"""Executes bash commands and returns the output.""" """
Wrapper class for starting subprocesses.
Uses the python built-in subprocesses.run()
Persistent processes are **not** available
on Windows systems, as pexpect makes use of
Unix pseudoterminals (ptys). MacOS and Linux
are okay.
Example:
.. code-block:: python
from langchain.utilities.bash import BashProcess
bash = BashProcess(
strip_newlines = False,
return_err_output = False,
persistent = False
)
bash.run('echo \'hello world\'')
"""
strip_newlines: bool = False
"""Whether or not to run .strip() on the output"""
return_err_output: bool = False
"""Whether or not to return the output of a failed
command, or just the error message and stacktrace"""
persistent: bool = False
"""Whether or not to spawn a persistent session
NOTE: Unavailable for Windows environments"""
def __init__( def __init__(
self, self,
@ -35,20 +48,47 @@ class BashProcess:
return_err_output: bool = False, return_err_output: bool = False,
persistent: bool = False, persistent: bool = False,
): ):
"""Initialize with stripping newlines.""" """
Initializes with default settings
"""
self.strip_newlines = strip_newlines self.strip_newlines = strip_newlines
self.return_err_output = return_err_output self.return_err_output = return_err_output
self.prompt = "" self.prompt = ""
self.process = None self.process = None
if persistent: if persistent:
self.prompt = str(uuid4()) self.prompt = str(uuid4())
self.process = self._initialize_persistent_process(self.prompt) self.process = self._initialize_persistent_process(self, self.prompt)
@staticmethod @staticmethod
def _initialize_persistent_process(prompt: str) -> pexpect.spawn: def _lazy_import_pexpect() -> pexpect:
"""Import pexpect only when needed."""
if platform.system() == "Windows":
raise ValueError(
"Persistent bash processes are not yet supported on Windows."
)
try:
import pexpect
except ImportError:
raise ImportError(
"pexpect required for persistent bash processes."
" To install, run `pip install pexpect`."
)
return pexpect
@staticmethod
def _initialize_persistent_process(self: BashProcess, prompt: str) -> pexpect.spawn:
# Start bash in a clean environment # Start bash in a clean environment
# Doesn't work on windows # Doesn't work on windows
pexpect = _lazy_import_pexpect() """
Initializes a persistent bash setting in a
clean environment.
NOTE: Unavailable on Windows
Args:
Prompt(str): the bash command to execute
""" # noqa: E501
pexpect = self._lazy_import_pexpect()
process = pexpect.spawn( process = pexpect.spawn(
"env", ["-i", "bash", "--norc", "--noprofile"], encoding="utf-8" "env", ["-i", "bash", "--norc", "--noprofile"], encoding="utf-8"
) )
@ -59,7 +99,14 @@ class BashProcess:
return process return process
def run(self, commands: Union[str, List[str]]) -> str: def run(self, commands: Union[str, List[str]]) -> str:
"""Run commands and return final output.""" """
Run commands in either an existing persistent
subprocess or on in a new subprocess environment.
Args:
commands(List[str]): a list of commands to
execute in the session
""" # noqa: E501
if isinstance(commands, str): if isinstance(commands, str):
commands = [commands] commands = [commands]
commands = ";".join(commands) commands = ";".join(commands)
@ -71,7 +118,13 @@ class BashProcess:
return self._run(commands) return self._run(commands)
def _run(self, command: str) -> str: def _run(self, command: str) -> str:
"""Run commands and return final output.""" """
Runs a command in a subprocess and returns
the output.
Args:
command: The command to run
""" # noqa: E501
try: try:
output = subprocess.run( output = subprocess.run(
command, command,
@ -89,14 +142,26 @@ class BashProcess:
return output return output
def process_output(self, output: str, command: str) -> str: def process_output(self, output: str, command: str) -> str:
# Remove the command from the output using a regular expression """
Uses regex to remove the command from the output
Args:
output: a process' output string
command: the executed command
""" # noqa: E501
pattern = re.escape(command) + r"\s*\n" pattern = re.escape(command) + r"\s*\n"
output = re.sub(pattern, "", output, count=1) output = re.sub(pattern, "", output, count=1)
return output.strip() return output.strip()
def _run_persistent(self, command: str) -> str: def _run_persistent(self, command: str) -> str:
"""Run commands and return final output.""" """
pexpect = _lazy_import_pexpect() Runs commands in a persistent environment
and returns the output.
Args:
command: the command to execute
""" # noqa: E501
pexpect = self._lazy_import_pexpect()
if self.process is None: if self.process is None:
raise ValueError("Process not initialized") raise ValueError("Process not initialized")
self.process.sendline(command) self.process.sendline(command)