MultiOn client toolkit (#8110)

Addition of MultiOn Client Agent Toolkit
Dependencies: multion pip package
This PR consists of the following:
- MultiOn utility,tools and integration with agent
- sample jupyter notebook.
Request @hwchase17 , @hinthornw

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Karthik Raja A
2023-07-22 20:49:01 +05:30
committed by GitHub
parent aa0e69bc98
commit 8b08687fc4
6 changed files with 314 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""MultiOn Toolkit."""

View File

@@ -0,0 +1,58 @@
"""MultiOn agent."""
from typing import Any, Dict, Optional
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
from langchain.agents.agent_toolkits.python.prompt import PREFIX
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.agents.types import AgentType
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains.llm import LLMChain
from langchain.schema import SystemMessage
from langchain.tools.multion.tool import MultionClientTool
def create_multion_agent(
llm: BaseLanguageModel,
tool: MultionClientTool,
agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
callback_manager: Optional[BaseCallbackManager] = None,
verbose: bool = False,
prefix: str = PREFIX,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Dict[str, Any],
) -> AgentExecutor:
"""Construct a multion agent from an LLM and tool."""
tools = [tool]
agent: BaseSingleActionAgent
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
elif agent_type == AgentType.OPENAI_FUNCTIONS:
system_message = SystemMessage(content=prefix)
_prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
agent = OpenAIFunctionsAgent(
llm=llm,
prompt=_prompt,
tools=[tool],
callback_manager=callback_manager,
**kwargs,
)
else:
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1 @@
"""MutliOn Client API toolkit."""

View File

@@ -0,0 +1,50 @@
"""Tool for MultiOn Extension API"""
from typing import Any, Optional
from pydantic import Field
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.tools.base import BaseTool
from langchain.utilities.multion import MultionClientAPIWrapper
def _get_default_multion_client() -> MultionClientAPIWrapper:
return MultionClientAPIWrapper()
class MultionClientTool(BaseTool):
"""Simulates a Browser interacting agent."""
name = "Multion_Client"
description = (
"A api to communicate with browser extension multion "
"Useful for automating tasks and actions in the browser "
"Input should be a task and a url."
"The result is text form of action that was executed in the given url."
)
api_wrapper: MultionClientAPIWrapper = Field(
default_factory=_get_default_multion_client
)
def _run(
self,
task: str,
url: str = "https://www.google.com/",
tabId: Optional[Any] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
return self.api_wrapper.run(task, url, tabId)
async def _arun(
self,
task: str,
url: str,
tabId: Optional[Any] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("Multion Client does not support async yet")

View File

@@ -0,0 +1,75 @@
"""Util that calls MultiOn Client.
In order to set this up, follow instructions at:
https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5
"""
from typing import Any, Optional
from pydantic import BaseModel
class MultionAPI:
def __init__(self) -> None:
self.tabId = None
self.new_session_count = 0
def create_session(self, query: str, url: str) -> str:
"""Always the first step to run any activities that can be done using browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
# Only create new session once and continue using update session
if self.new_session_count < 2:
response = multion.new_session({"input": query, "url": url})
self.new_session_count += 1
self.tabId = response["tabId"]
return response["message"]
else:
return "Continue using update session"
def update_session(self, query: str, url: str) -> str:
"""Updates the existing browser session.
Updates with given action and url, used consequently to handle browser
activities after creating one session of browser.
Args:
'query': the query that you need to perform in the given url.
If there is no 'query' set it as open.
'url': the base url of a site.
"""
import multion
response = multion.update_session(self.tabId, {"input": query, "url": url})
return response["message"]
class MultionClientAPIWrapper(BaseModel):
"""Wrapper for Multion Client API.
In order to set this up, follow instructions at:
NEED TO ADD
"""
client: Any = MultionAPI()
def run(self, task: str, url: str, tabId: Optional[Any]) -> str:
"""Run body through Multion Client and respond with action.
Args:
task:
url:
tabId:
"""
if self.client.tabId is None or tabId is None:
self.client = MultionAPI()
message = self.client.create_session(task, url)
else:
message = self.client.update_session(task, url)
return message