Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Lance Martin <lance@langchain.dev>
Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
Erick Friis
2023-10-25 18:47:42 -07:00
committed by GitHub
parent 43257a295c
commit ebf998acb6
242 changed files with 53432 additions and 31 deletions

View File

@@ -0,0 +1,3 @@
# anthropic-iterative-search
Heavily inspired by [this notebook](https://github.com/anthropics/anthropic-cookbook/blob/main/long_context/wikipedia-search-cookbook.ipynb)

View File

@@ -0,0 +1,11 @@
from langchain.schema.runnable import ConfigurableField
from .retriever_agent import executor
from .chain import chain
final_chain = chain.configurable_alternatives(
ConfigurableField(id="chain"),
default_key="response",
# This adds a new option, with name `openai` that is equal to `ChatOpenAI()`
retrieve=executor,
)

View File

@@ -0,0 +1,15 @@
def _format_docs(docs):
result = "\n".join(
[
f'<item index="{i+1}">\n<page_content>\n{r}\n</page_content>\n</item>'
for i, r in enumerate(docs)
]
)
return result
def format_agent_scratchpad(intermediate_steps):
thoughts = ""
for action, observation in intermediate_steps:
thoughts += action.log
thoughts += '</search_query>' + _format_docs(observation)
return thoughts

View File

@@ -0,0 +1,15 @@
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatAnthropic
from langchain.schema.output_parser import StrOutputParser
from .prompts import answer_prompt
from .retriever_agent import executor
prompt = ChatPromptTemplate.from_template(answer_prompt)
model = ChatAnthropic(model="claude-2", temperature=0, max_tokens_to_sample=1000)
chain = {
"query": lambda x: x["query"],
"information": executor | (lambda x: x["output"])
} | prompt | model | StrOutputParser()

View File

@@ -0,0 +1,31 @@
from langchain.schema.agent import AgentAction, AgentFinish
import re
from .agent_scratchpad import _format_docs
def extract_between_tags(tag: str, string: str, strip: bool = True) -> str:
ext_list = re.findall(f"<{tag}\s?>(.+?)</{tag}\s?>", string, re.DOTALL)
if strip:
ext_list = [e.strip() for e in ext_list]
if ext_list:
if len(ext_list) != 1:
raise ValueError
# Only return the first one
return ext_list[0]
def parse_output(outputs):
partial_completion = outputs["partial_completion"]
steps = outputs["intermediate_steps"]
search_query = extract_between_tags('search_query', partial_completion + '</search_query>')
if search_query is None:
docs = []
str_output = ""
for action, observation in steps:
docs.extend(observation)
str_output += action.log
str_output += '</search_query>' + _format_docs(observation)
str_output += partial_completion
return AgentFinish({"docs": docs, "output": str_output}, log=partial_completion)
else:
return AgentAction(tool="search", tool_input=search_query, log=partial_completion)

View File

@@ -0,0 +1,7 @@
retrieval_prompt = """{retriever_description} Before beginning to research the user's question, first think for a moment inside <scratchpad> tags about what information is necessary for a well-informed answer. If the user's question is complex, you may need to decompose the query into multiple subqueries and execute them individually. Sometimes the search engine will return empty search results, or the search results may not contain the information you need. In such cases, feel free to try again with a different query.
After each call to the Search Engine Tool, reflect briefly inside <search_quality></search_quality> tags about whether you now have enough information to answer, or whether more information is needed. If you have all the relevant information, write it in <information></information> tags, WITHOUT actually answering the question. Otherwise, issue a new search.
Here is the user's question: <question>{query}</question> Remind yourself to make short queries in your scratchpad as you plan out your strategy."""
answer_prompt = "Here is a user query: <query>{query}</query>. Here is some relevant information: <information>{information}</information>. Please answer the question using the relevant information."

View File

@@ -0,0 +1,16 @@
from langchain.retrievers import WikipediaRetriever
from langchain.tools import tool
# This is used to tell the model how to best use the retriever.
retriever_description = """You will be asked a question by a human user. You have access to the following tool to help answer the question. <tool_description> Search Engine Tool * The search engine will exclusively search over Wikipedia for pages similar to your query. It returns for each page its title and full page content. Use this tool if you want to get up-to-date and comprehensive information on a topic to help answer queries. Queries should be as atomic as possible -- they only need to address one part of the user's question. For example, if the user's query is "what is the color of a basketball?", your search query should be "basketball". Here's another example: if the user's question is "Who created the first neural network?", your first query should be "neural network". As you can see, these queries are quite short. Think keywords, not phrases. * At any time, you can make a call to the search engine using the following syntax: <search_query>query_word</search_query>. * You'll then get results back in <search_result> tags.</tool_description>"""
retriever = WikipediaRetriever()
# This should be the same as the function name below
RETRIEVER_TOOL_NAME = "search"
@tool
def search(query):
"""Search with the retriever."""
return retriever.get_relevant_documents(query)

View File

@@ -0,0 +1,29 @@
from langchain.chat_models import ChatAnthropic
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableMap
from langchain.schema.output_parser import StrOutputParser
from langchain.agents import AgentExecutor
from .retriever import search, RETRIEVER_TOOL_NAME, retriever_description
from .prompts import retrieval_prompt
from .agent_scratchpad import format_agent_scratchpad
from .output_parser import parse_output
prompt = ChatPromptTemplate.from_messages([
("user", retrieval_prompt),
("ai", "{agent_scratchpad}"),
])
prompt = prompt.partial(retriever_description=retriever_description)
model = ChatAnthropic(model="claude-2", temperature=0, max_tokens_to_sample=1000)
chain = RunnablePassthrough.assign(
agent_scratchpad=lambda x: format_agent_scratchpad(x['intermediate_steps'])
) | prompt | model.bind( stop_sequences=['</search_query>']) | StrOutputParser()
agent_chain = RunnableMap({
"partial_completion": chain,
"intermediate_steps": lambda x: x['intermediate_steps']
}) | parse_output
executor = AgentExecutor(agent=agent_chain, tools = [search], verbose=True)

View File

@@ -0,0 +1,6 @@
from anthropic_iterative_search import final_chain
if __name__ == "__main__":
query = "Which movie came out first: Oppenheimer, or Are You There God It's Me Margaret?"
print(final_chain.with_config(configurable={"chain": "retrieve"}).invoke({"query": query}))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,20 @@
[tool.poetry]
name = "anthropic_iterative_search"
version = "0.0.1"
description = ""
authors = []
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = ">=0.0.313"
anthropic = "^0.5.0"
wikipedia = "^1.4.0"
[tool.langserve]
export_module = "anthropic_iterative_search"
export_attr = "final_chain"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"