diff --git a/langchain/chains/llm_math/examples.json b/langchain/chains/llm_math/examples.json new file mode 100644 index 00000000000..f87b225225b --- /dev/null +++ b/langchain/chains/llm_math/examples.json @@ -0,0 +1,6 @@ +[ + { + "question": "What is 37593 * 67?", + "answer": "```python\nprint(37593 * 67)\n```\n```output\n2518731\n```\nAnswer: 2518731" + } +] \ No newline at end of file diff --git a/langchain/chains/llm_math/prompt.py b/langchain/chains/llm_math/prompt.py index b389e91737b..1cab5948b3f 100644 --- a/langchain/chains/llm_math/prompt.py +++ b/langchain/chains/llm_math/prompt.py @@ -1,7 +1,7 @@ # flake8: noqa from langchain.prompts.prompt import Prompt -_PROMPT_TEMPLATE = """You are GPT-3, and you can't do math. +_PREFIX = """You are GPT-3, and you can't do math. You can do basic math, and your memorization abilities are impressive, but you can't do any complex calculations that a human could not do in their head. You also have an annoying tendency to just make up highly specific, but wrong, answers. @@ -21,18 +21,28 @@ Otherwise, use this simpler format: Question: ${{Question without hard calculation}} Answer: ${{Answer}} -Begin. +Begin.""" -Question: What is 37593 * 67? +from pathlib import Path -```python -print(37593 * 67) -``` -```output -2518731 -``` -Answer: 2518731 +from langchain.prompts.data import BaseExample -Question: {question}""" +example_path = Path(__file__).parent / "examples.json" +import json -PROMPT = Prompt(input_variables=["question"], template=_PROMPT_TEMPLATE) + +class LLMMathExample(BaseExample): + question: str + answer: str + + def formatted(self) -> str: + return f"Question: {self.question}\n\n{self.answer}" + + +with open(example_path) as f: + raw_examples = json.load(f) + examples = [LLMMathExample(**example) for example in raw_examples] + +PROMPT = Prompt.from_examples( + examples, "Question: {question}", ["question"], prefix=_PREFIX +) diff --git a/langchain/chains/react/examples.json b/langchain/chains/react/examples.json new file mode 100644 index 00000000000..6228eb82e22 --- /dev/null +++ b/langchain/chains/react/examples.json @@ -0,0 +1,26 @@ +[ + { + "question": "What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?", + "answer": "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of that area.\nAction 1: Search[Colorado orogeny]\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\nThought 2: It does not mention the eastern sector. So I need to look up eastern sector.\nAction 2: Lookup[eastern sector]\nObservation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\nThought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\nAction 3: Search[High Plains]\nObservation 3: High Plains refers to one of two distinct land regions\nThought 4: I need to instead search High Plains (United States).\nAction 4: Search[High Plains (United States)]\nObservation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\nThought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\nAction 5: Finish[1,800 to 7,000 ft]" + }, + { + "question": "Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?", + "answer": "Thought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\nAction 1: Search[Milhouse]\nObservation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\nThought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".\nAction 2: Lookup[named after]\nObservation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.\nThought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\nAction 3: Finish[Richard Nixon]" + }, + { + "question": "Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?", + "answer": "Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\nAction 1: Search[Adam Clayton Powell]\nObservation 1 Could not find [Adam Clayton Powell]. Similar: [’Adam Clayton Powell III’, ’Seventh Avenue (Manhattan)’, ’Adam Clayton Powell Jr. State Office Building’, ’Isabel Washington Powell’, ’Adam Powell’, ’Adam Clayton Powell (film)’, ’Giancarlo Esposito’].\nThought 2: To find the documentary, I can search Adam Clayton Powell (film).\nAction 2: Search[Adam Clayton Powell (film)]\nObservation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg. The film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\nThought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\nAction 3: Finish[The Saimaa Gesture]" + }, + { + "question": "What profession does Nicholas Ray and Elia Kazan have in common?", + "answer": "Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\nAction 1: Search[Nicholas Ray]\nObservation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\nThought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\nAction 2: Search[Elia Kazan]\nObservation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\nThought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\nAction 3: Finish[director, screenwriter, actor]" + }, + { + "question": "Which magazine was started first Arthur’s Magazine or First for Women?", + "answer": "Thought 1: I need to search Arthur’s Magazine and First for Women, and find which was started first.\nAction 1: Search[Arthur’s Magazine]\nObservation 1: Arthur’s Magazine (1844-1846) was an American literary periodical published in Philadelphia in the 19th century.\nThought 2: Arthur’s Magazine was started in 1844. I need to search First for Women next.\nAction 2: Search[First for Women]\nObservation 2: First for Women is a woman’s magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989.\nThought 3: First for Women was started in 1989. 1844 (Arthur’s Magazine) < 1989 (First for Women), so Arthur’s Magazine was started first.\nAction 3: Finish[Arthur’s Magazine]" + }, + { + "question": "Were Pavel Urysohn and Leonid Levin known for the same type of work?", + "answer": "Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\nAction 1: Search[Pavel Urysohn]\nObservation 1: Pavel Samuilovich Urysohn (February 3, 1898 - August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\nThought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\nAction 2: Search[Leonid Levin]\nObservation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist.\nThought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work.\nAction 3: Finish[yes]" + } +] \ No newline at end of file diff --git a/langchain/chains/react/prompt.py b/langchain/chains/react/prompt.py index e0e16299f86..6f8b39fda60 100644 --- a/langchain/chains/react/prompt.py +++ b/langchain/chains/react/prompt.py @@ -1,116 +1,29 @@ # flake8: noqa +from pathlib import Path + +from langchain.prompts.data import BaseExample from langchain.prompts.prompt import Prompt -EXAMPLES = [ - """Question: What is the elevation range for the area that the eastern sector of the -Colorado orogeny extends into? -Thought 1: I need to search Colorado orogeny, find the area that the eastern sector -of the Colorado orogeny extends into, then find the elevation range of the -area. -Action 1: Search[Colorado orogeny] -Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in -Colorado and surrounding areas. -Thought 2: It does not mention the eastern sector. So I need to look up eastern -sector. -Action 2: Lookup[eastern sector] -Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called -the Central Plains orogeny. -Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I -need to search High Plains and find its elevation range. -Action 3: Search[High Plains] -Observation 3: High Plains refers to one of two distinct land regions -Thought 4: I need to instead search High Plains (United States). -Action 4: Search[High Plains (United States)] -Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the -High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 -m).[3] -Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer -is 1,800 to 7,000 ft. -Action 5: Finish[1,800 to 7,000 ft]""", - """Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" -character Milhouse, who Matt Groening named after who? -Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after -who. I only need to search Milhouse and find who it is named after. -Action 1: Search[Milhouse] -Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated -television series The Simpsons voiced by Pamela Hayden and created by Matt -Groening. -Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up -"named after". -Action 2: Lookup[named after] -Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose -middle name was Milhous. -Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is -Richard Nixon. -Action 3: Finish[Richard Nixon]""", - """Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The -Saimaa Gesture? -Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which -documentary is about Finnish rock groups. -Action 1: Search[Adam Clayton Powell] -Observation 1 Could not find [Adam Clayton Powell]. Similar: [’Adam Clayton Powell -III’, ’Seventh Avenue (Manhattan)’, ’Adam Clayton Powell Jr. State Office -Building’, ’Isabel Washington Powell’, ’Adam Powell’, ’Adam Clayton Powell -(film)’, ’Giancarlo Esposito’]. -Thought 2: To find the documentary, I can search Adam Clayton Powell (film). -Action 2: Search[Adam Clayton Powell (film)] -Observation 2: Adam Clayton Powell is a 1989 American documentary film directed by -Richard Kilberg. The film is about the rise and fall of influential -African-American politician Adam Clayton Powell Jr.[3][4] It was later aired -as part of the PBS series The American Experience. -Thought 3: Adam Clayton Powell (film) is a documentary about an African-American -politician, not Finnish rock groups. So the documentary about Finnish rock -groups must instead be The Saimaa Gesture. -Action 3: Finish[The Saimaa Gesture]""", - """Question: What profession does Nicholas Ray and Elia Kazan have in common? -Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then -find the profession they have in common. -Action 1: Search[Nicholas Ray] -Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16, -1979) was an American film director, screenwriter, and actor best known for -the 1955 film Rebel Without a Cause. -Thought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need -to search Elia Kazan next and find his professions. -Action 2: Search[Elia Kazan] -Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter -and actor. -Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. -So profession Nicholas Ray and Elia Kazan have in common is director, -screenwriter, and actor. -Action 3: Finish[director, screenwriter, actor]""", - """Question: Which magazine was started first Arthur’s Magazine or First for Women? -Thought 1: I need to search Arthur’s Magazine and First for Women, and find which was -started first. -Action 1: Search[Arthur’s Magazine] -Observation 1: Arthur’s Magazine (1844-1846) was an American literary periodical published -in Philadelphia in the 19th century. -Thought 2: Arthur’s Magazine was started in 1844. I need to search First for Women -next. -Action 2: Search[First for Women] -Observation 2: First for Women is a woman’s magazine published by Bauer Media Group in the -USA.[1] The magazine was started in 1989. -Thought 3: First for Women was started in 1989. 1844 (Arthur’s Magazine) < 1989 (First -for Women), so Arthur’s Magazine was started first. -Action 3: Finish[Arthur’s Magazine]""", - """Question: Were Pavel Urysohn and Leonid Levin known for the same type of work? -Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, -then find if they are the same. -Action 1: Search[Pavel Urysohn] -Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 - August 17, 1924) was a Soviet -mathematician who is best known for his contributions in dimension theory. -Thought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and -find its type of work. -Action 2: Search[Leonid Levin] -Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer -scientist. -Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn -and Leonid Levin have the same type of work. -Action 3: Finish[yes]""", -] -SUFFIX = """\n\nQuestion: {input}""" +example_path = Path(__file__).parent / "examples.json" +import json + + +class ReActExample(BaseExample): + question: str + answer: str + + def formatted(self) -> str: + return f"Question: {self.question}\n{self.answer}" + + +with open(example_path) as f: + raw_examples = json.load(f) + examples = [ReActExample(**example) for example in raw_examples] + +SUFFIX = """Question: {input}""" PROMPT = Prompt.from_examples( - EXAMPLES, + examples, SUFFIX, ["input"], ) diff --git a/langchain/chains/self_ask_with_search/prompt.py b/langchain/chains/self_ask_with_search/prompt.py index 3208a5cbef9..1301c32c80f 100644 --- a/langchain/chains/self_ask_with_search/prompt.py +++ b/langchain/chains/self_ask_with_search/prompt.py @@ -1,8 +1,9 @@ # flake8: noqa -from langchain.prompts.dynamic import -from langchain.prompts.data import BaseExample from pathlib import Path +from langchain.prompts.data import BaseExample +from langchain.prompts.prompt import Prompt + example_path = Path(__file__).parent / "examples.json" import json @@ -14,43 +15,13 @@ class SelfAskWithSearchExample(BaseExample): def formatted(self) -> str: return f"Question: {self.question}\n{self.answer}" + with open(example_path) as f: raw_examples = json.load(f) examples = [SelfAskWithSearchExample(**example) for example in raw_examples] -_DEFAULT_TEMPLATE = """Question: Who lived longer, Muhammad Ali or Alan Turing? - - -Question: When was the founder of craigslist born? -Are follow up questions needed here: Yes. -Follow up: Who was the founder of craigslist? -Intermediate answer: Craigslist was founded by Craig Newmark. -Follow up: When was Craig Newmark born? -Intermediate answer: Craig Newmark was born on December 6, 1952. -So the final answer is: December 6, 1952 - -Question: Who was the maternal grandfather of George Washington? -Are follow up questions needed here: Yes. -Follow up: Who was the mother of George Washington? -Intermediate answer: The mother of George Washington was Mary Ball Washington. -Follow up: Who was the father of Mary Ball Washington? -Intermediate answer: The father of Mary Ball Washington was Joseph Ball. -So the final answer is: Joseph Ball - -Question: Are both the directors of Jaws and Casino Royale from the same country? -Are follow up questions needed here: Yes. -Follow up: Who is the director of Jaws? -Intermediate Answer: The director of Jaws is Steven Spielberg. -Follow up: Where is Steven Spielberg from? -Intermediate Answer: The United States. -Follow up: Who is the director of Casino Royale? -Intermediate Answer: The director of Casino Royale is Martin Campbell. -Follow up: Where is Martin Campbell from? -Intermediate Answer: New Zealand. -So the final answer is: No - -Question: {input}""" -PROMPT = Prompt( - input_variables=["input"], - template=_DEFAULT_TEMPLATE, +PROMPT = Prompt.from_examples( + examples, + "Question: {input}", + ["input"], ) diff --git a/langchain/example_generator.py b/langchain/example_generator.py index 818a848a710..87167b82705 100644 --- a/langchain/example_generator.py +++ b/langchain/example_generator.py @@ -1,16 +1,18 @@ """Utility functions for working with prompts.""" -from typing import List +from typing import Sequence, Union from langchain.chains.llm import LLMChain from langchain.llms.base import LLM +from langchain.prompts.data import BaseExample, convert_to_examples from langchain.prompts.dynamic import DynamicPrompt TEST_GEN_TEMPLATE_SUFFIX = "Add another example." -def generate_example(examples: List[str], llm: LLM) -> str: +def generate_example(examples: Sequence[Union[str, BaseExample]], llm: LLM) -> str: """Return another example given a list of examples for a prompt.""" - prompt = DynamicPrompt(examples=examples, suffix=TEST_GEN_TEMPLATE_SUFFIX) + full_examples = convert_to_examples(examples) + prompt = DynamicPrompt(examples=full_examples, suffix=TEST_GEN_TEMPLATE_SUFFIX) chain = LLMChain(llm=llm, prompt=prompt) return chain.predict() diff --git a/langchain/prompts/base.py b/langchain/prompts/base.py index d99d940053d..3902a907064 100644 --- a/langchain/prompts/base.py +++ b/langchain/prompts/base.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import Any, List from langchain.formatting import formatter +from langchain.prompts.data import BaseExample DEFAULT_FORMATTER_MAPPING = { "f-string": formatter.format, diff --git a/langchain/prompts/data.py b/langchain/prompts/data.py index ddddd8b903c..50348c09298 100644 --- a/langchain/prompts/data.py +++ b/langchain/prompts/data.py @@ -1,5 +1,7 @@ +from abc import ABC, abstractmethod + from pydantic import BaseModel -from abc import abstractmethod, ABC + class BaseExample(BaseModel, ABC): """Base class for examples.""" @@ -15,4 +17,19 @@ class SimpleExample(BaseExample): text: str def formatted(self) -> str: - return self.text \ No newline at end of file + return self.text + + +from typing import Sequence, Union + + +def convert_to_examples( + examples: Sequence[Union[str, BaseExample]] +) -> Sequence[BaseExample]: + new_examples = [ + example + if isinstance(example, BaseExample) + else SimpleExample(text=str(example)) + for example in examples + ] + return new_examples diff --git a/langchain/prompts/dynamic.py b/langchain/prompts/dynamic.py index af92302a7a1..c58c86e2187 100644 --- a/langchain/prompts/dynamic.py +++ b/langchain/prompts/dynamic.py @@ -5,7 +5,8 @@ from typing import Any, Callable, Dict, List from pydantic import BaseModel, Extra, root_validator from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING, BasePrompt -from langchain.prompts.data import BaseExample, SimpleExample +from langchain.prompts.data import BaseExample, convert_to_examples + class DynamicPrompt(BaseModel, BasePrompt): r"""Schema to represent a dynamic prompt for an LLM. @@ -117,7 +118,5 @@ class DynamicPrompt(BaseModel, BasePrompt): @root_validator() def convert_examples(cls, values: Dict) -> Dict: - examples = values["examples"] - examples = [example if isinstance(example, BaseExample) else SimpleExample(text=str(example)) for example in examples] - values["examples"] = examples + values["examples"] = convert_to_examples(values["examples"]) return values diff --git a/langchain/prompts/prompt.py b/langchain/prompts/prompt.py index 02f87b77003..2d120e94aca 100644 --- a/langchain/prompts/prompt.py +++ b/langchain/prompts/prompt.py @@ -1,9 +1,10 @@ """Prompt schema definition.""" -from typing import Any, Dict, List +from typing import Any, Dict, List, Sequence, Union from pydantic import BaseModel, Extra, root_validator from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING, BasePrompt +from langchain.prompts.data import BaseExample, convert_to_examples class Prompt(BaseModel, BasePrompt): @@ -70,7 +71,7 @@ class Prompt(BaseModel, BasePrompt): @classmethod def from_examples( cls, - examples: List[str], + examples: Sequence[Union[BaseExample, str]], suffix: str, input_variables: List[str], example_separator: str = "\n\n", @@ -94,6 +95,7 @@ class Prompt(BaseModel, BasePrompt): Returns: The final prompt generated. """ - example_str = example_separator.join(examples) - template = prefix + example_str + suffix + full_examples = convert_to_examples(examples) + data = [prefix] + [example.formatted for example in full_examples] + [suffix] + template = example_separator.join(data) return cls(input_variables=input_variables, template=template) diff --git a/tests/unit_tests/chains/test_llm_math.py b/tests/unit_tests/chains/test_llm_math.py index b38d89dd2a0..1cf2ef5f2db 100644 --- a/tests/unit_tests/chains/test_llm_math.py +++ b/tests/unit_tests/chains/test_llm_math.py @@ -3,18 +3,18 @@ import pytest from langchain.chains.llm_math.base import LLMMathChain -from langchain.chains.llm_math.prompt import _PROMPT_TEMPLATE +from langchain.chains.llm_math.prompt import PROMPT from tests.unit_tests.llms.fake_llm import FakeLLM @pytest.fixture def fake_llm_math_chain() -> LLMMathChain: """Fake LLM Math chain for testing.""" - complex_question = _PROMPT_TEMPLATE.format(question="What is the square root of 2?") + complex_question = PROMPT.format(question="What is the square root of 2?") queries = { - _PROMPT_TEMPLATE.format(question="What is 1 plus 1?"): "Answer: 2", + PROMPT.format(question="What is 1 plus 1?"): "Answer: 2", complex_question: "```python\nprint(2**.5)\n```", - _PROMPT_TEMPLATE.format(question="foo"): "foo", + PROMPT.format(question="foo"): "foo", } fake_llm = FakeLLM(queries=queries) return LLMMathChain(llm=fake_llm, input_key="q", output_key="a")