mirror of
https://github.com/hwchase17/langchain.git
synced 2025-10-23 19:44:05 +00:00
189 lines
4.3 KiB
Plaintext
189 lines
4.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "cd835d40",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Multi-modal outputs: Image & Text"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "fa88e03a",
|
|
"metadata": {},
|
|
"source": [
|
|
"This notebook shows how non-text producing tools can be used to create multi-modal agents.\n",
|
|
"\n",
|
|
"This example is limited to text and image outputs and uses UUIDs to transfer content across tools and agents. \n",
|
|
"\n",
|
|
"This example uses Steamship to generate and store generated images. Generated are auth protected by default. \n",
|
|
"\n",
|
|
"You can get your Steamship api key here: https://steamship.com/account/api"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0653da01",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from steamship import Block, Steamship\n",
|
|
"import re\n",
|
|
"from IPython.display import Image"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f6933033",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.llms import OpenAI\n",
|
|
"from langchain.agents import initialize_agent\n",
|
|
"from langchain.agents import AgentType\n",
|
|
"from langchain.tools import SteamshipImageGenerationTool"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "71e51e53",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"llm = OpenAI(temperature=0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a9fc769d",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Dall-E "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cd177dfe",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tools = [SteamshipImageGenerationTool(model_name=\"dall-e\")]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c71b1e46",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"mrkl = initialize_agent(\n",
|
|
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "603aeb9a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"output = mrkl.run(\"How would you visualize a parot playing soccer?\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "25eb4efe",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def show_output(output):\n",
|
|
" \"\"\"Display the multi-modal output from the agent.\"\"\"\n",
|
|
" UUID_PATTERN = re.compile(\n",
|
|
" r\"([0-9A-Za-z]{8}-[0-9A-Za-z]{4}-[0-9A-Za-z]{4}-[0-9A-Za-z]{4}-[0-9A-Za-z]{12})\"\n",
|
|
" )\n",
|
|
"\n",
|
|
" outputs = UUID_PATTERN.split(output)\n",
|
|
" outputs = [\n",
|
|
" re.sub(r\"^\\W+\", \"\", el) for el in outputs\n",
|
|
" ] # Clean trailing and leading non-word characters\n",
|
|
"\n",
|
|
" for output in outputs:\n",
|
|
" maybe_block_id = UUID_PATTERN.search(output)\n",
|
|
" if maybe_block_id:\n",
|
|
" display(Image(Block.get(Steamship(), _id=maybe_block_id.group()).raw()))\n",
|
|
" else:\n",
|
|
" print(output, end=\"\\n\\n\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e247b2c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## StableDiffusion "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "315025e7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tools = [SteamshipImageGenerationTool(model_name=\"stable-diffusion\")]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7930064a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"mrkl = initialize_agent(\n",
|
|
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "611a833d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"output = mrkl.run(\"How would you visualize a parot playing soccer?\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|