This commit is contained in:
Harrison Chase 2022-11-12 06:47:05 -08:00
parent 903d6315d5
commit eb54c0b847
11 changed files with 121 additions and 174 deletions

View File

@ -0,0 +1,6 @@
[
{
"question": "What is 37593 * 67?",
"answer": "```python\nprint(37593 * 67)\n```\n```output\n2518731\n```\nAnswer: 2518731"
}
]

View File

@ -1,7 +1,7 @@
# flake8: noqa # flake8: noqa
from langchain.prompts.prompt import Prompt from langchain.prompts.prompt import Prompt
_PROMPT_TEMPLATE = """You are GPT-3, and you can't do math. _PREFIX = """You are GPT-3, and you can't do math.
You can do basic math, and your memorization abilities are impressive, but you can't do any complex calculations that a human could not do in their head. You also have an annoying tendency to just make up highly specific, but wrong, answers. You can do basic math, and your memorization abilities are impressive, but you can't do any complex calculations that a human could not do in their head. You also have an annoying tendency to just make up highly specific, but wrong, answers.
@ -21,18 +21,28 @@ Otherwise, use this simpler format:
Question: ${{Question without hard calculation}} Question: ${{Question without hard calculation}}
Answer: ${{Answer}} Answer: ${{Answer}}
Begin. Begin."""
Question: What is 37593 * 67? from pathlib import Path
```python from langchain.prompts.data import BaseExample
print(37593 * 67)
```
```output
2518731
```
Answer: 2518731
Question: {question}""" example_path = Path(__file__).parent / "examples.json"
import json
PROMPT = Prompt(input_variables=["question"], template=_PROMPT_TEMPLATE)
class LLMMathExample(BaseExample):
question: str
answer: str
def formatted(self) -> str:
return f"Question: {self.question}\n\n{self.answer}"
with open(example_path) as f:
raw_examples = json.load(f)
examples = [LLMMathExample(**example) for example in raw_examples]
PROMPT = Prompt.from_examples(
examples, "Question: {question}", ["question"], prefix=_PREFIX
)

View File

@ -0,0 +1,26 @@
[
{
"question": "What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?",
"answer": "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of that area.\nAction 1: Search[Colorado orogeny]\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\nThought 2: It does not mention the eastern sector. So I need to look up eastern sector.\nAction 2: Lookup[eastern sector]\nObservation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\nThought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\nAction 3: Search[High Plains]\nObservation 3: High Plains refers to one of two distinct land regions\nThought 4: I need to instead search High Plains (United States).\nAction 4: Search[High Plains (United States)]\nObservation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\nThought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\nAction 5: Finish[1,800 to 7,000 ft]"
},
{
"question": "Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?",
"answer": "Thought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\nAction 1: Search[Milhouse]\nObservation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\nThought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".\nAction 2: Lookup[named after]\nObservation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.\nThought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\nAction 3: Finish[Richard Nixon]"
},
{
"question": "Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?",
"answer": "Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\nAction 1: Search[Adam Clayton Powell]\nObservation 1 Could not find [Adam Clayton Powell]. Similar: [Adam Clayton Powell III, Seventh Avenue (Manhattan), Adam Clayton Powell Jr. State Office Building, Isabel Washington Powell, Adam Powell, Adam Clayton Powell (film), Giancarlo Esposito].\nThought 2: To find the documentary, I can search Adam Clayton Powell (film).\nAction 2: Search[Adam Clayton Powell (film)]\nObservation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg. The film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\nThought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\nAction 3: Finish[The Saimaa Gesture]"
},
{
"question": "What profession does Nicholas Ray and Elia Kazan have in common?",
"answer": "Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\nAction 1: Search[Nicholas Ray]\nObservation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\nThought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\nAction 2: Search[Elia Kazan]\nObservation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\nThought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\nAction 3: Finish[director, screenwriter, actor]"
},
{
"question": "Which magazine was started first Arthurs Magazine or First for Women?",
"answer": "Thought 1: I need to search Arthurs Magazine and First for Women, and find which was started first.\nAction 1: Search[Arthurs Magazine]\nObservation 1: Arthurs Magazine (1844-1846) was an American literary periodical published in Philadelphia in the 19th century.\nThought 2: Arthurs Magazine was started in 1844. I need to search First for Women next.\nAction 2: Search[First for Women]\nObservation 2: First for Women is a womans magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989.\nThought 3: First for Women was started in 1989. 1844 (Arthurs Magazine) < 1989 (First for Women), so Arthurs Magazine was started first.\nAction 3: Finish[Arthurs Magazine]"
},
{
"question": "Were Pavel Urysohn and Leonid Levin known for the same type of work?",
"answer": "Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\nAction 1: Search[Pavel Urysohn]\nObservation 1: Pavel Samuilovich Urysohn (February 3, 1898 - August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\nThought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\nAction 2: Search[Leonid Levin]\nObservation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist.\nThought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work.\nAction 3: Finish[yes]"
}
]

View File

@ -1,116 +1,29 @@
# flake8: noqa # flake8: noqa
from pathlib import Path
from langchain.prompts.data import BaseExample
from langchain.prompts.prompt import Prompt from langchain.prompts.prompt import Prompt
EXAMPLES = [ example_path = Path(__file__).parent / "examples.json"
"""Question: What is the elevation range for the area that the eastern sector of the import json
Colorado orogeny extends into?
Thought 1: I need to search Colorado orogeny, find the area that the eastern sector
of the Colorado orogeny extends into, then find the elevation range of the class ReActExample(BaseExample):
area. question: str
Action 1: Search[Colorado orogeny] answer: str
Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in
Colorado and surrounding areas. def formatted(self) -> str:
Thought 2: It does not mention the eastern sector. So I need to look up eastern return f"Question: {self.question}\n{self.answer}"
sector.
Action 2: Lookup[eastern sector]
Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called with open(example_path) as f:
the Central Plains orogeny. raw_examples = json.load(f)
Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I examples = [ReActExample(**example) for example in raw_examples]
need to search High Plains and find its elevation range.
Action 3: Search[High Plains] SUFFIX = """Question: {input}"""
Observation 3: High Plains refers to one of two distinct land regions
Thought 4: I need to instead search High Plains (United States).
Action 4: Search[High Plains (United States)]
Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the
High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130
m).[3]
Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer
is 1,800 to 7,000 ft.
Action 5: Finish[1,800 to 7,000 ft]""",
"""Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons"
character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after
who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated
television series The Simpsons voiced by Pamela Hayden and created by Matt
Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up
"named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose
middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is
Richard Nixon.
Action 3: Finish[Richard Nixon]""",
"""Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The
Saimaa Gesture?
Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which
documentary is about Finnish rock groups.
Action 1: Search[Adam Clayton Powell]
Observation 1 Could not find [Adam Clayton Powell]. Similar: [Adam Clayton Powell
III, Seventh Avenue (Manhattan), Adam Clayton Powell Jr. State Office
Building, Isabel Washington Powell, Adam Powell, Adam Clayton Powell
(film), Giancarlo Esposito].
Thought 2: To find the documentary, I can search Adam Clayton Powell (film).
Action 2: Search[Adam Clayton Powell (film)]
Observation 2: Adam Clayton Powell is a 1989 American documentary film directed by
Richard Kilberg. The film is about the rise and fall of influential
African-American politician Adam Clayton Powell Jr.[3][4] It was later aired
as part of the PBS series The American Experience.
Thought 3: Adam Clayton Powell (film) is a documentary about an African-American
politician, not Finnish rock groups. So the documentary about Finnish rock
groups must instead be The Saimaa Gesture.
Action 3: Finish[The Saimaa Gesture]""",
"""Question: What profession does Nicholas Ray and Elia Kazan have in common?
Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then
find the profession they have in common.
Action 1: Search[Nicholas Ray]
Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16,
1979) was an American film director, screenwriter, and actor best known for
the 1955 film Rebel Without a Cause.
Thought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need
to search Elia Kazan next and find his professions.
Action 2: Search[Elia Kazan]
Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter
and actor.
Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor.
So profession Nicholas Ray and Elia Kazan have in common is director,
screenwriter, and actor.
Action 3: Finish[director, screenwriter, actor]""",
"""Question: Which magazine was started first Arthurs Magazine or First for Women?
Thought 1: I need to search Arthurs Magazine and First for Women, and find which was
started first.
Action 1: Search[Arthurs Magazine]
Observation 1: Arthurs Magazine (1844-1846) was an American literary periodical published
in Philadelphia in the 19th century.
Thought 2: Arthurs Magazine was started in 1844. I need to search First for Women
next.
Action 2: Search[First for Women]
Observation 2: First for Women is a womans magazine published by Bauer Media Group in the
USA.[1] The magazine was started in 1989.
Thought 3: First for Women was started in 1989. 1844 (Arthurs Magazine) < 1989 (First
for Women), so Arthurs Magazine was started first.
Action 3: Finish[Arthurs Magazine]""",
"""Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?
Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work,
then find if they are the same.
Action 1: Search[Pavel Urysohn]
Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 - August 17, 1924) was a Soviet
mathematician who is best known for his contributions in dimension theory.
Thought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and
find its type of work.
Action 2: Search[Leonid Levin]
Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer
scientist.
Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn
and Leonid Levin have the same type of work.
Action 3: Finish[yes]""",
]
SUFFIX = """\n\nQuestion: {input}"""
PROMPT = Prompt.from_examples( PROMPT = Prompt.from_examples(
EXAMPLES, examples,
SUFFIX, SUFFIX,
["input"], ["input"],
) )

View File

@ -1,8 +1,9 @@
# flake8: noqa # flake8: noqa
from langchain.prompts.dynamic import
from langchain.prompts.data import BaseExample
from pathlib import Path from pathlib import Path
from langchain.prompts.data import BaseExample
from langchain.prompts.prompt import Prompt
example_path = Path(__file__).parent / "examples.json" example_path = Path(__file__).parent / "examples.json"
import json import json
@ -14,43 +15,13 @@ class SelfAskWithSearchExample(BaseExample):
def formatted(self) -> str: def formatted(self) -> str:
return f"Question: {self.question}\n{self.answer}" return f"Question: {self.question}\n{self.answer}"
with open(example_path) as f: with open(example_path) as f:
raw_examples = json.load(f) raw_examples = json.load(f)
examples = [SelfAskWithSearchExample(**example) for example in raw_examples] examples = [SelfAskWithSearchExample(**example) for example in raw_examples]
_DEFAULT_TEMPLATE = """Question: Who lived longer, Muhammad Ali or Alan Turing? PROMPT = Prompt.from_examples(
examples,
"Question: {input}",
Question: When was the founder of craigslist born? ["input"],
Are follow up questions needed here: Yes.
Follow up: Who was the founder of craigslist?
Intermediate answer: Craigslist was founded by Craig Newmark.
Follow up: When was Craig Newmark born?
Intermediate answer: Craig Newmark was born on December 6, 1952.
So the final answer is: December 6, 1952
Question: Who was the maternal grandfather of George Washington?
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washington?
Intermediate answer: The father of Mary Ball Washington was Joseph Ball.
So the final answer is: Joseph Ball
Question: Are both the directors of Jaws and Casino Royale from the same country?
Are follow up questions needed here: Yes.
Follow up: Who is the director of Jaws?
Intermediate Answer: The director of Jaws is Steven Spielberg.
Follow up: Where is Steven Spielberg from?
Intermediate Answer: The United States.
Follow up: Who is the director of Casino Royale?
Intermediate Answer: The director of Casino Royale is Martin Campbell.
Follow up: Where is Martin Campbell from?
Intermediate Answer: New Zealand.
So the final answer is: No
Question: {input}"""
PROMPT = Prompt(
input_variables=["input"],
template=_DEFAULT_TEMPLATE,
) )

View File

@ -1,16 +1,18 @@
"""Utility functions for working with prompts.""" """Utility functions for working with prompts."""
from typing import List from typing import Sequence, Union
from langchain.chains.llm import LLMChain from langchain.chains.llm import LLMChain
from langchain.llms.base import LLM from langchain.llms.base import LLM
from langchain.prompts.data import BaseExample, convert_to_examples
from langchain.prompts.dynamic import DynamicPrompt from langchain.prompts.dynamic import DynamicPrompt
TEST_GEN_TEMPLATE_SUFFIX = "Add another example." TEST_GEN_TEMPLATE_SUFFIX = "Add another example."
def generate_example(examples: List[str], llm: LLM) -> str: def generate_example(examples: Sequence[Union[str, BaseExample]], llm: LLM) -> str:
"""Return another example given a list of examples for a prompt.""" """Return another example given a list of examples for a prompt."""
prompt = DynamicPrompt(examples=examples, suffix=TEST_GEN_TEMPLATE_SUFFIX) full_examples = convert_to_examples(examples)
prompt = DynamicPrompt(examples=full_examples, suffix=TEST_GEN_TEMPLATE_SUFFIX)
chain = LLMChain(llm=llm, prompt=prompt) chain = LLMChain(llm=llm, prompt=prompt)
return chain.predict() return chain.predict()

View File

@ -3,6 +3,7 @@ from abc import ABC, abstractmethod
from typing import Any, List from typing import Any, List
from langchain.formatting import formatter from langchain.formatting import formatter
from langchain.prompts.data import BaseExample
DEFAULT_FORMATTER_MAPPING = { DEFAULT_FORMATTER_MAPPING = {
"f-string": formatter.format, "f-string": formatter.format,

View File

@ -1,5 +1,7 @@
from abc import ABC, abstractmethod
from pydantic import BaseModel from pydantic import BaseModel
from abc import abstractmethod, ABC
class BaseExample(BaseModel, ABC): class BaseExample(BaseModel, ABC):
"""Base class for examples.""" """Base class for examples."""
@ -16,3 +18,18 @@ class SimpleExample(BaseExample):
def formatted(self) -> str: def formatted(self) -> str:
return self.text return self.text
from typing import Sequence, Union
def convert_to_examples(
examples: Sequence[Union[str, BaseExample]]
) -> Sequence[BaseExample]:
new_examples = [
example
if isinstance(example, BaseExample)
else SimpleExample(text=str(example))
for example in examples
]
return new_examples

View File

@ -5,7 +5,8 @@ from typing import Any, Callable, Dict, List
from pydantic import BaseModel, Extra, root_validator from pydantic import BaseModel, Extra, root_validator
from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING, BasePrompt from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING, BasePrompt
from langchain.prompts.data import BaseExample, SimpleExample from langchain.prompts.data import BaseExample, convert_to_examples
class DynamicPrompt(BaseModel, BasePrompt): class DynamicPrompt(BaseModel, BasePrompt):
r"""Schema to represent a dynamic prompt for an LLM. r"""Schema to represent a dynamic prompt for an LLM.
@ -117,7 +118,5 @@ class DynamicPrompt(BaseModel, BasePrompt):
@root_validator() @root_validator()
def convert_examples(cls, values: Dict) -> Dict: def convert_examples(cls, values: Dict) -> Dict:
examples = values["examples"] values["examples"] = convert_to_examples(values["examples"])
examples = [example if isinstance(example, BaseExample) else SimpleExample(text=str(example)) for example in examples]
values["examples"] = examples
return values return values

View File

@ -1,9 +1,10 @@
"""Prompt schema definition.""" """Prompt schema definition."""
from typing import Any, Dict, List from typing import Any, Dict, List, Sequence, Union
from pydantic import BaseModel, Extra, root_validator from pydantic import BaseModel, Extra, root_validator
from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING, BasePrompt from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING, BasePrompt
from langchain.prompts.data import BaseExample, convert_to_examples
class Prompt(BaseModel, BasePrompt): class Prompt(BaseModel, BasePrompt):
@ -70,7 +71,7 @@ class Prompt(BaseModel, BasePrompt):
@classmethod @classmethod
def from_examples( def from_examples(
cls, cls,
examples: List[str], examples: Sequence[Union[BaseExample, str]],
suffix: str, suffix: str,
input_variables: List[str], input_variables: List[str],
example_separator: str = "\n\n", example_separator: str = "\n\n",
@ -94,6 +95,7 @@ class Prompt(BaseModel, BasePrompt):
Returns: Returns:
The final prompt generated. The final prompt generated.
""" """
example_str = example_separator.join(examples) full_examples = convert_to_examples(examples)
template = prefix + example_str + suffix data = [prefix] + [example.formatted for example in full_examples] + [suffix]
template = example_separator.join(data)
return cls(input_variables=input_variables, template=template) return cls(input_variables=input_variables, template=template)

View File

@ -3,18 +3,18 @@
import pytest import pytest
from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.llm_math.base import LLMMathChain
from langchain.chains.llm_math.prompt import _PROMPT_TEMPLATE from langchain.chains.llm_math.prompt import PROMPT
from tests.unit_tests.llms.fake_llm import FakeLLM from tests.unit_tests.llms.fake_llm import FakeLLM
@pytest.fixture @pytest.fixture
def fake_llm_math_chain() -> LLMMathChain: def fake_llm_math_chain() -> LLMMathChain:
"""Fake LLM Math chain for testing.""" """Fake LLM Math chain for testing."""
complex_question = _PROMPT_TEMPLATE.format(question="What is the square root of 2?") complex_question = PROMPT.format(question="What is the square root of 2?")
queries = { queries = {
_PROMPT_TEMPLATE.format(question="What is 1 plus 1?"): "Answer: 2", PROMPT.format(question="What is 1 plus 1?"): "Answer: 2",
complex_question: "```python\nprint(2**.5)\n```", complex_question: "```python\nprint(2**.5)\n```",
_PROMPT_TEMPLATE.format(question="foo"): "foo", PROMPT.format(question="foo"): "foo",
} }
fake_llm = FakeLLM(queries=queries) fake_llm = FakeLLM(queries=queries)
return LLMMathChain(llm=fake_llm, input_key="q", output_key="a") return LLMMathChain(llm=fake_llm, input_key="q", output_key="a")