Harrison/add react chain (#24)

from https://arxiv.org/abs/2210.03629

still need to think if docstore abstraction makes sense
This commit is contained in:
Harrison Chase 2022-10-26 21:02:23 -07:00 committed by GitHub
parent 61a51b7a76
commit ce7b14b843
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 616 additions and 0 deletions

84
examples/react.ipynb Normal file
View File

@ -0,0 +1,84 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "4e272b47",
"metadata": {},
"outputs": [],
"source": [
"from langchain import OpenAI, ReActChain, Wikipedia\n",
"\n",
"llm = OpenAI(temperature=0)\n",
"react = ReActChain(llm=llm, docstore=Wikipedia())"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8078c8f1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Search Scott Derrickson\n",
"Could not find [Scott Derrickson]. Similar: ['Scott Derrickson', 'The Black Phone', 'Sinister (film)', 'Doctor Strange (2016 film)', 'The Day the Earth Stood Still (2008 film)', 'The Exorcism of Emily Rose', 'Deliver Us from Evil (2014 film)', 'C. Robert Cargill', 'Scott Teems', 'Sean Harris']\n",
"Search Scott Derrickson (film director)\n",
"Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for directing the films The Exorcism of Emily Rose (2005), The Day the Earth Stood Still (2008), Sinister (2012), Deliver Us from Evil (2014), Doctor Strange (2016), and The Black Phone (2021).\n",
"Search Ed Wood\n",
"Edward Davis Wood Jr. (October 10, 1924 December 10, 1978) was an American filmmaker, \n",
"actor, and pulp novel author.\n",
"In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959). In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 pulp crime and sex novels.\n",
"Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D. Wood Jr., a biopic of his life, Ed Wood (1994), was directed by Tim Burton. Starring Johnny Depp as Wood and Martin Landau as Bela Lugosi, the film received critical acclaim and various awards, including two Academy Awards.\n",
"Finish yes\n"
]
},
{
"data": {
"text/plain": [
"'yes'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"question = \"Were Scott Derrickson and Ed Wood of the same nationality?\"\n",
"react.run(question)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a6bd3b4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -9,9 +9,11 @@ from langchain.chains import (
LLMChain,
LLMMathChain,
PythonChain,
ReActChain,
SelfAskWithSearchChain,
SerpAPIChain,
)
from langchain.docstore import Wikipedia
from langchain.llms import Cohere, HuggingFaceHub, OpenAI
from langchain.prompt import Prompt
@ -24,5 +26,7 @@ __all__ = [
"Cohere",
"OpenAI",
"Prompt",
"ReActChain",
"Wikipedia",
"HuggingFaceHub",
]

View File

@ -2,6 +2,7 @@
from langchain.chains.llm import LLMChain
from langchain.chains.llm_math.base import LLMMathChain
from langchain.chains.python import PythonChain
from langchain.chains.react.base import ReActChain
from langchain.chains.self_ask_with_search.base import SelfAskWithSearchChain
from langchain.chains.serpapi import SerpAPIChain
@ -11,4 +12,5 @@ __all__ = [
"PythonChain",
"SelfAskWithSearchChain",
"SerpAPIChain",
"ReActChain",
]

View File

@ -0,0 +1 @@
"""Implements the ReAct paper from https://arxiv.org/pdf/2210.03629.pdf."""

View File

@ -0,0 +1,116 @@
"""Chain that implements the ReAct paper from https://arxiv.org/pdf/2210.03629.pdf."""
import re
from typing import Any, Dict, List, Tuple
from pydantic import BaseModel, Extra
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.chains.react.prompt import PROMPT
from langchain.docstore.base import Docstore
from langchain.llms.base import LLM
def predict_until_observation(
llm_chain: LLMChain, prompt: str, i: int
) -> Tuple[str, str, str]:
"""Generate text until an observation is needed."""
action_prefix = f"Action {i}: "
stop_seq = f"\nObservation {i}:"
ret_text = llm_chain.predict(input=prompt, stop=[stop_seq])
# Sometimes the LLM forgets to take an action, so we prompt it to.
while not ret_text.split("\n")[-1].startswith(action_prefix):
ret_text += f"\nAction {i}:"
new_text = llm_chain.predict(input=prompt + ret_text, stop=[stop_seq])
ret_text += new_text
# The action block should be the last line.
action_block = ret_text.split("\n")[-1]
action_str = action_block[len(action_prefix) :]
# Parse out the action and the directive.
re_matches = re.search(r"(.*?)\[(.*?)\]", action_str)
if re_matches is None:
raise ValueError(f"Could not parse action directive: {action_str}")
return ret_text, re_matches.group(1), re_matches.group(2)
class ReActChain(Chain, BaseModel):
"""Chain that implements the ReAct paper.
Example:
.. code-block:: python
from langchain import ReActChain, OpenAI
react = ReAct(llm=OpenAI())
"""
llm: LLM
"""LLM wrapper to use."""
docstore: Docstore
"""Docstore to use."""
input_key: str = "question" #: :meta private:
output_key: str = "answer" #: :meta private:
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@property
def input_keys(self) -> List[str]:
"""Expect input key.
:meta private:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""Expect output key.
:meta private:
"""
return ["full_logic", self.output_key]
def _run(self, inputs: Dict[str, Any]) -> Dict[str, str]:
question = inputs[self.input_key]
llm_chain = LLMChain(llm=self.llm, prompt=PROMPT)
prompt = f"{question}\nThought 1:"
i = 1
document = None
while True:
ret_text, action, directive = predict_until_observation(
llm_chain, prompt, i
)
prompt += ret_text
print(action, directive)
if action == "Search":
observation, document = self.docstore.search(directive)
print(observation)
elif action == "Lookup":
if document is None:
raise ValueError("Cannot lookup without a successful search first")
observation = document.lookup(directive)
elif action == "Finish":
return {"full_logic": prompt, self.output_key: directive}
else:
raise ValueError(f"Got unknown action directive: {action}")
prompt += f"\nObservation {i}: " + observation + f"\nThought {i + 1}:"
i += 1
def run(self, question: str) -> str:
"""Run ReAct framework.
Args:
question: Question to be answered.
Returns:
Final answer from thinking through the ReAct framework.
Example:
.. code-block:: python
question = "Were Scott Derrickson and Ed Wood of the same nationality?"
answer = react.run(question)
"""
return self({self.input_key: question})[self.output_key]

View File

@ -0,0 +1,118 @@
# flake8: noqa
from langchain.prompt import Prompt
_DEFAULT_TEMPLATE = """Question: What is the elevation range for the area that the eastern sector of the
Colorado orogeny extends into?
Thought 1: I need to search Colorado orogeny, find the area that the eastern sector
of the Colorado orogeny extends into, then find the elevation range of the
area.
Action 1: Search[Colorado orogeny]
Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in
Colorado and surrounding areas.
Thought 2: It does not mention the eastern sector. So I need to look up eastern
sector.
Action 2: Lookup[eastern sector]
Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called
the Central Plains orogeny.
Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I
need to search High Plains and find its elevation range.
Action 3: Search[High Plains]
Observation 3: High Plains refers to one of two distinct land regions
Thought 4: I need to instead search High Plains (United States).
Action 4: Search[High Plains (United States)]
Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the
High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130
m).[3]
Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer
is 1,800 to 7,000 ft.
Action 5: Finish[1,800 to 7,000 ft]
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons"
character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after
who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated
television series The Simpsons voiced by Pamela Hayden and created by Matt
Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up
"named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose
middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is
Richard Nixon.
Action 3: Finish[Richard Nixon]
Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The
Saimaa Gesture?
Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which
documentary is about Finnish rock groups.
Action 1: Search[Adam Clayton Powell]
Observation 1 Could not find [Adam Clayton Powell]. Similar: [Adam Clayton Powell
III, Seventh Avenue (Manhattan), Adam Clayton Powell Jr. State Office
Building, Isabel Washington Powell, Adam Powell, Adam Clayton Powell
(film), Giancarlo Esposito].
Thought 2: To find the documentary, I can search Adam Clayton Powell (film).
Action 2: Search[Adam Clayton Powell (film)]
Observation 2: Adam Clayton Powell is a 1989 American documentary film directed by
Richard Kilberg. The film is about the rise and fall of influential
African-American politician Adam Clayton Powell Jr.[3][4] It was later aired
as part of the PBS series The American Experience.
Thought 3: Adam Clayton Powell (film) is a documentary about an African-American
politician, not Finnish rock groups. So the documentary about Finnish rock
groups must instead be The Saimaa Gesture.
Action 3: Finish[The Saimaa Gesture]
Question: What profession does Nicholas Ray and Elia Kazan have in common?
Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then
find the profession they have in common.
Action 1: Search[Nicholas Ray]
Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16,
1979) was an American film director, screenwriter, and actor best known for
the 1955 film Rebel Without a Cause.
Thought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need
to search Elia Kazan next and find his professions.
Action 2: Search[Elia Kazan]
Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter
and actor.
Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor.
So profession Nicholas Ray and Elia Kazan have in common is director,
screenwriter, and actor.
Action 3: Finish[director, screenwriter, actor]
Question: Which magazine was started first Arthurs Magazine or First for Women?
Thought 1: I need to search Arthurs Magazine and First for Women, and find which was
started first.
Action 1: Search[Arthurs Magazine]
Observation 1: Arthurs Magazine (1844-1846) was an American literary periodical published
in Philadelphia in the 19th century.
Thought 2: Arthurs Magazine was started in 1844. I need to search First for Women
next.
Action 2: Search[First for Women]
Observation 2: First for Women is a womans magazine published by Bauer Media Group in the
USA.[1] The magazine was started in 1989.
Thought 3: First for Women was started in 1989. 1844 (Arthurs Magazine) < 1989 (First
for Women), so Arthurs Magazine was started first.
Action 3: Finish[Arthurs Magazine]
Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?
Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work,
then find if they are the same.
Action 1: Search[Pavel Urysohn]
Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 - August 17, 1924) was a Soviet
mathematician who is best known for his contributions in dimension theory.
Thought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and
find its type of work.
Action 2: Search[Leonid Levin]
Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer
scientist.
Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn
and Leonid Levin have the same type of work.
Action 3: Finish[yes]
Question: {input}"""
PROMPT = Prompt(
input_variables=["input"],
template=_DEFAULT_TEMPLATE,
)

View File

@ -0,0 +1,4 @@
"""Wrappers on top of docstores."""
from langchain.docstore.wikipedia import Wikipedia
__all__ = ["Wikipedia"]

View File

@ -0,0 +1,17 @@
"""Interface to access to place that stores documents."""
from abc import ABC, abstractmethod
from typing import Optional, Tuple
from langchain.docstore.document import Document
class Docstore(ABC):
"""Interface to access to place that stores documents."""
@abstractmethod
def search(self, search: str) -> Tuple[str, Optional[Document]]:
"""Search for document.
If page exists, return the page summary, and a Document object.
If page does not exist, return similar entries.
"""

View File

@ -0,0 +1,38 @@
"""Interface for interacting with a document."""
from typing import List
from pydantic import BaseModel
class Document(BaseModel):
"""Interface for interacting with a document."""
page_content: str
lookup_str: str = ""
lookup_index = 0
@property
def paragraphs(self) -> List[str]:
"""Paragraphs of the page."""
return self.page_content.split("\n\n")
@property
def summary(self) -> str:
"""Summary of the page (the first paragraph)."""
return self.paragraphs[0]
def lookup(self, string: str) -> str:
"""Lookup a term in the page, imitating cmd-F functionality."""
if string.lower() != self.lookup_str:
self.lookup_str = string.lower()
self.lookup_index = 0
else:
self.lookup_index += 1
lookups = [p for p in self.paragraphs if self.lookup_str in p.lower()]
if len(lookups) == 0:
return "No Results"
elif self.lookup_index >= len(lookups):
return "No More Results"
else:
result_prefix = f"(Result {self.lookup_index + 1}/{len(lookups)})"
return f"{result_prefix} {lookups[self.lookup_index]}"

View File

@ -0,0 +1,45 @@
"""Wrapper around wikipedia API."""
from typing import Optional, Tuple
from langchain.docstore.base import Docstore
from langchain.docstore.document import Document
class Wikipedia(Docstore):
"""Wrapper around wikipedia API."""
def __init__(self) -> None:
"""Check that wikipedia package is installed."""
try:
import wikipedia # noqa: F401
except ImportError:
raise ValueError(
"Could not import wikipedia python package. "
"Please it install it with `pip install wikipedia`."
)
def search(self, search: str) -> Tuple[str, Optional[Document]]:
"""Try to search for wiki page.
If page exists, return the page summary, and a PageWithLookups object.
If page does not exist, return similar entries.
"""
import wikipedia
try:
page_content = wikipedia.page(search).content
wiki_page = Document(page_content=page_content)
observation = wiki_page.summary
except wikipedia.PageError:
wiki_page = None
observation = (
f"Could not find [{search}]. " f"Similar: {wikipedia.search(search)}"
)
except wikipedia.DisambiguationError:
wiki_page = None
observation = (
f"Could not find [{search}]. " f"Similar: {wikipedia.search(search)}"
)
return observation, wiki_page

View File

@ -10,6 +10,7 @@ cohere
openai
google-search-results
playwright
wikipedia
huggingface_hub
# For development
jupyter

View File

@ -0,0 +1,13 @@
"""Integration test for self ask with search."""
from langchain.chains.react.base import ReActChain
from langchain.llms.openai import OpenAI
def test_react() -> None:
"""Test functionality on a prompt."""
llm = OpenAI(temperature=0)
react = ReActChain(llm=llm)
question = "Were Scott Derrickson and Ed Wood of the same nationality?"
output = react.run(question)
assert output == "yes"

View File

@ -0,0 +1,113 @@
"""Unit tests for ReAct."""
from typing import List, Optional, Tuple
import pytest
from langchain.chains.llm import LLMChain
from langchain.chains.react.base import ReActChain, predict_until_observation
from langchain.docstore.base import Docstore
from langchain.docstore.document import Document
from langchain.llms.base import LLM
from langchain.prompt import Prompt
_PAGE_CONTENT = """This is a page about LangChain.
It is a really cool framework.
What isn't there to love about langchain?
Made in 2022."""
_FAKE_PROMPT = Prompt(input_variables=["input"], template="{input}")
class FakeListLLM(LLM):
"""Fake LLM for testing that outputs elements of a list."""
def __init__(self, responses: List[str]):
"""Initialize with list of responses."""
self.responses = responses
self.i = -1
def __call__(self, prompt: str, stop: Optional[List[str]] = None) -> str:
"""Increment counter, and then return response in that index."""
self.i += 1
return self.responses[self.i]
class FakeDocstore(Docstore):
"""Fake docstore for testing purposes."""
def search(self, search: str) -> Tuple[str, Optional[Document]]:
"""Return the fake document."""
document = Document(page_content=_PAGE_CONTENT)
return document.summary, document
def test_predict_until_observation_normal() -> None:
"""Test predict_until_observation when observation is made normally."""
outputs = ["foo\nAction 1: search[foo]"]
fake_llm = FakeListLLM(outputs)
fake_llm_chain = LLMChain(llm=fake_llm, prompt=_FAKE_PROMPT)
ret_text, action, directive = predict_until_observation(fake_llm_chain, "", 1)
assert ret_text == outputs[0]
assert action == "search"
assert directive == "foo"
def test_predict_until_observation_repeat() -> None:
"""Test when no action is generated initially."""
outputs = ["foo", " search[foo]"]
fake_llm = FakeListLLM(outputs)
fake_llm_chain = LLMChain(llm=fake_llm, prompt=_FAKE_PROMPT)
ret_text, action, directive = predict_until_observation(fake_llm_chain, "", 1)
assert ret_text == "foo\nAction 1: search[foo]"
assert action == "search"
assert directive == "foo"
def test_predict_until_observation_error() -> None:
"""Test handling of generation of text that cannot be parsed."""
outputs = ["foo\nAction 1: foo"]
fake_llm = FakeListLLM(outputs)
fake_llm_chain = LLMChain(llm=fake_llm, prompt=_FAKE_PROMPT)
with pytest.raises(ValueError):
predict_until_observation(fake_llm_chain, "", 1)
def test_react_chain() -> None:
"""Test react chain."""
responses = [
"I should probably search\nAction 1: Search[langchain]",
"I should probably lookup\nAction 2: Lookup[made]",
"Ah okay now I know the answer\nAction 3: Finish[2022]",
]
fake_llm = FakeListLLM(responses)
react_chain = ReActChain(llm=fake_llm, docstore=FakeDocstore())
inputs = {"question": "when was langchain made"}
output = react_chain(inputs)
assert output["answer"] == "2022"
expected_full_output = (
"when was langchain made\n"
"Thought 1:I should probably search\n"
"Action 1: Search[langchain]\n"
"Observation 1: This is a page about LangChain.\n"
"Thought 2:I should probably lookup\n"
"Action 2: Lookup[made]\n"
"Observation 2: (Result 1/1) Made in 2022.\n"
"Thought 3:Ah okay now I know the answer\n"
"Action 3: Finish[2022]"
)
assert output["full_logic"] == expected_full_output
def test_react_chain_bad_action() -> None:
"""Test react chain when bad action given."""
responses = [
"I should probably search\nAction 1: BadAction[langchain]",
]
fake_llm = FakeListLLM(responses)
react_chain = ReActChain(llm=fake_llm, docstore=FakeDocstore())
with pytest.raises(ValueError):
react_chain.run("when was langchain made")

View File

@ -0,0 +1 @@
"""Test functionality related to the docstore objects."""

View File

@ -0,0 +1,59 @@
"""Test document functionality."""
from langchain.docstore.document import Document
_PAGE_CONTENT = """This is a page about LangChain.
It is a really cool framework.
What isn't there to love about langchain?
Made in 2022."""
def test_document_summary() -> None:
"""Test that we extract the summary okay."""
page = Document(page_content=_PAGE_CONTENT)
assert page.summary == "This is a page about LangChain."
def test_document_lookup() -> None:
"""Test that can lookup things okay."""
page = Document(page_content=_PAGE_CONTENT)
# Start with lookup on "LangChain".
output = page.lookup("LangChain")
assert output == "(Result 1/2) This is a page about LangChain."
# Now switch to looking up "framework".
output = page.lookup("framework")
assert output == "(Result 1/1) It is a really cool framework."
# Now switch back to looking up "LangChain", should reset.
output = page.lookup("LangChain")
assert output == "(Result 1/2) This is a page about LangChain."
# Lookup "LangChain" again, should go to the next mention.
output = page.lookup("LangChain")
assert output == "(Result 2/2) What isn't there to love about langchain?"
def test_document_lookups_dont_exist() -> None:
"""Test lookup on term that doesn't exist in the document."""
page = Document(page_content=_PAGE_CONTENT)
# Start with lookup on "harrison".
output = page.lookup("harrison")
assert output == "No Results"
def test_document_lookups_too_many() -> None:
"""Test lookup on term too many times."""
page = Document(page_content=_PAGE_CONTENT)
# Start with lookup on "framework".
output = page.lookup("framework")
assert output == "(Result 1/1) It is a really cool framework."
# Now try again, should be exhausted.
output = page.lookup("framework")
assert output == "No More Results"