diff --git a/docs/use_cases/agent_simulations.md b/docs/use_cases/agent_simulations.md index df8b3a8b37b..45e80107db6 100644 --- a/docs/use_cases/agent_simulations.md +++ b/docs/use_cases/agent_simulations.md @@ -8,6 +8,9 @@ Agent simulations generally involve two main components: Specific implementations of agent simulations (or parts of agent simulations) include +## Simulations with One Agent +- [Simulated Environment: Gymnasium](agent_simulations/agent_with_simulated_environment.ipynb): an example of how to create a simple agent-environment interaction loop with [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) (formerly [OpenAI Gym](https://github.com/openai/gym)). + ## Simulations with Two Agents - [CAMEL](agent_simulations/camel_role_playing.ipynb): an implementation of the CAMEL (Communicative Agents for “Mind” Exploration of Large Scale Language Model Society) paper, where two agents communicate with each other. - [Two Player D&D](agent_simulations/two_player_dnd.ipynb): an example of how to use a generic simulator for two agents to implement a variant of the popular Dungeons & Dragons role playing game. diff --git a/docs/use_cases/agent_simulations/gymnasium.ipynb b/docs/use_cases/agent_simulations/gymnasium.ipynb new file mode 100644 index 00000000000..64094bd8480 --- /dev/null +++ b/docs/use_cases/agent_simulations/gymnasium.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4b089493", + "metadata": {}, + "source": [ + "# Simulated Environment: Gymnasium\n", + "\n", + "For many applications of LLM agents, the environment is real (internet, database, REPL, etc). However, we can also define agents to interact in simulated environments like text-based games. This is an example of how to create a simple agent-environment interaction loop with [Gymnasium](https://github.com/Farama-Foundation/Gymnasium) (formerly [OpenAI Gym](https://github.com/openai/gym))." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "f9bd38b4", + "metadata": {}, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "import tenacity\n", + "\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.schema import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage,\n", + " BaseMessage,\n", + ")\n", + "from langchain.output_parsers import RegexParser" + ] + }, + { + "cell_type": "markdown", + "id": "e222e811", + "metadata": {}, + "source": [ + "## Define the agent" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "870c24bc", + "metadata": {}, + "outputs": [], + "source": [ + "def get_docs(env):\n", + " while 'env' in dir(env):\n", + " env = env.env\n", + " return env.__doc__\n", + "\n", + "class Agent():\n", + " def __init__(self, model, env):\n", + " self.model = model\n", + " self.docs = get_docs(env)\n", + " \n", + " self.instructions = \"\"\"\n", + "Your goal is to maximize your return, i.e. the sum of the rewards you receive.\n", + "I will give you an observation, reward, terminiation flag, truncation flag, and the return so far, formatted as:\n", + "\n", + "Observation: \n", + "Reward: \n", + "Termination: \n", + "Truncation: \n", + "Return: \n", + "\n", + "You will respond with an action, formatted as:\n", + "\n", + "Action: \n", + "\n", + "where you replace with your actual action.\n", + "Do nothing else but return the action.\n", + "\"\"\"\n", + " self.action_parser = RegexParser(\n", + " regex=r\"Action: (.*)\", \n", + " output_keys=['action'], \n", + " default_output_key='action')\n", + " \n", + " self.message_history = []\n", + " self.ret = 0\n", + " \n", + " def reset(self, obs):\n", + " self.message_history = [\n", + " SystemMessage(content=self.docs),\n", + " SystemMessage(content=self.instructions),\n", + " ]\n", + " obs_message = f\"\"\"\n", + "Observation: {obs}\n", + "Reward: 0\n", + "Termination: False\n", + "Truncation: False\n", + "Return: 0\n", + " \"\"\"\n", + " self.message_history.append(HumanMessage(content=obs_message))\n", + " return obs_message\n", + " \n", + " def observe(self, obs, rew, term, trunc, info):\n", + " self.ret += rew\n", + " \n", + " obs_message = f\"\"\"\n", + "Observation: {obs}\n", + "Reward: {rew}\n", + "Termination: {term}\n", + "Truncation: {trunc}\n", + "Return: {self.ret}\n", + " \"\"\"\n", + " self.message_history.append(HumanMessage(content=obs_message))\n", + " return obs_message\n", + " \n", + " @tenacity.retry(stop=tenacity.stop_after_attempt(2),\n", + " wait=tenacity.wait_none(), # No waiting time between retries\n", + " retry=tenacity.retry_if_exception_type(ValueError),\n", + " before_sleep=lambda retry_state: print(f\"ValueError occurred: {retry_state.outcome.exception()}, retrying...\"),\n", + " retry_error_callback=lambda retry_state: 0) # Default value when all retries are exhausted\n", + " def act(self):\n", + " act_message = self.model(self.message_history)\n", + " self.message_history.append(act_message)\n", + " action = int(self.action_parser.parse(act_message.content)['action'])\n", + " return action" + ] + }, + { + "cell_type": "markdown", + "id": "2e76d22c", + "metadata": {}, + "source": [ + "## Initialize the simulated environment and agent" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "9e902cfd", + "metadata": {}, + "outputs": [], + "source": [ + "env = gym.make(\"Blackjack-v1\")\n", + "agent = Agent(model=ChatOpenAI(temperature=0.2), env=env)" + ] + }, + { + "cell_type": "markdown", + "id": "e2c12b15", + "metadata": {}, + "source": [ + "## Main loop" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "ad361210", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Observation: (10, 3, 0)\n", + "Reward: 0\n", + "Termination: False\n", + "Truncation: False\n", + "Return: 0\n", + " \n", + "Action: 1\n", + "\n", + "Observation: (18, 3, 0)\n", + "Reward: 0.0\n", + "Termination: False\n", + "Truncation: False\n", + "Return: 0.0\n", + " \n", + "Action: 0\n", + "\n", + "Observation: (18, 3, 0)\n", + "Reward: 1.0\n", + "Termination: True\n", + "Truncation: False\n", + "Return: 1.0\n", + " \n", + "break True False\n" + ] + } + ], + "source": [ + "observation, info = env.reset()\n", + "obs_message = agent.reset(observation)\n", + "print(obs_message)\n", + "while True:\n", + " action = agent.act()\n", + " observation, reward, termination, truncation, info = env.step(action)\n", + " obs_message = agent.observe(observation, reward, termination, truncation, info)\n", + " print(f'Action: {action}')\n", + " print(obs_message)\n", + " if termination or truncation:\n", + " print('break', termination, truncation)\n", + " break\n", + "env.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58a13e9c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}