diff --git a/libs/langchain/langchain/agents/agent_toolkits/playwright/toolkit.py b/libs/langchain/langchain/agents/agent_toolkits/playwright/toolkit.py index 8d12d90e54f..44e8ea5cb76 100644 --- a/libs/langchain/langchain/agents/agent_toolkits/playwright/toolkit.py +++ b/libs/langchain/langchain/agents/agent_toolkits/playwright/toolkit.py @@ -31,7 +31,32 @@ else: class PlayWrightBrowserToolkit(BaseToolkit): - """Toolkit for PlayWright browser tools.""" + """Toolkit for PlayWright browser tools. + + **Security Note**: This toolkit provides code to control a web-browser. + + Careful if exposing this toolkit to end-users. The tools in the toolkit + are capable of navigating to arbitrary webpages, clicking on arbitrary + elements, and extracting arbitrary text and hyperlinks from webpages. + + Specifically, by default this toolkit allows navigating to: + + - Any URL (including any internal network URLs) + - And local files + + If exposing to end-users, consider limiting network access to the + server that hosts the agent; in addition, consider it is advised + to create a custom NavigationTool wht an args_schema that limits the URLs + that can be navigated to (e.g., only allow navigating to URLs that + start with a particular prefix). + + Remember to scope permissions to the minimal permissions necessary for + the application. If the default tool selection is not appropriate for + the application, consider creating a custom toolkit with the appropriate + tools. + + See https://python.langchain.com/docs/security for more information. + """ sync_browser: Optional["SyncBrowser"] = None async_browser: Optional["AsyncBrowser"] = None diff --git a/libs/langchain/langchain/tools/playwright/navigate.py b/libs/langchain/langchain/tools/playwright/navigate.py index ac91f1f2418..288efe0a8e0 100644 --- a/libs/langchain/langchain/tools/playwright/navigate.py +++ b/libs/langchain/langchain/tools/playwright/navigate.py @@ -1,12 +1,13 @@ from __future__ import annotations from typing import Optional, Type +from urllib.parse import urlparse from langchain.callbacks.manager import ( AsyncCallbackManagerForToolRun, CallbackManagerForToolRun, ) -from langchain.pydantic_v1 import BaseModel, Field +from langchain.pydantic_v1 import BaseModel, Field, validator from langchain.tools.playwright.base import BaseBrowserTool from langchain.tools.playwright.utils import ( aget_current_page, @@ -19,9 +20,34 @@ class NavigateToolInput(BaseModel): url: str = Field(..., description="url to navigate to") + @validator("url") + def validate_url_scheme(cls, url: str) -> str: + """Check that the URL scheme is valid.""" + parsed_url = urlparse(url) + if parsed_url.scheme not in ("http", "https"): + raise ValueError("URL scheme must be 'http' or 'https'") + return url + class NavigateTool(BaseBrowserTool): - """Tool for navigating a browser to a URL.""" + """Tool for navigating a browser to a URL. + + **Security Note**: This tool provides code to control web-browser navigation. + + This tool can navigate to any URL, including internal network URLs, and + URLs exposed on the server itself. + + However, if exposing this tool to end-users, consider limiting network + access to the server that hosts the agent. + + By default, the URL scheme has been limited to 'http' and 'https' to + prevent navigation to local file system URLs (or other schemes). + + If access to the local file system is required, consider creating a custom + tool or providing a custom args_schema that allows the desired URL schemes. + + See https://python.langchain.com/docs/security for more information. + """ name: str = "navigate_browser" description: str = "Navigate a browser to the specified URL"