Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Lance Martin <lance@langchain.dev>
Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
Erick Friis
2023-10-25 18:47:42 -07:00
committed by GitHub
parent 43257a295c
commit ebf998acb6
242 changed files with 53432 additions and 31 deletions

View File

@@ -0,0 +1,92 @@
from typing import Tuple, List
from pydantic import BaseModel
from operator import itemgetter
from langchain.vectorstores import Pinecone
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import format_document, AIMessage, HumanMessage
from langchain.prompts.prompt import PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableBranch, RunnableLambda, RunnableMap
### Ingest code - you may need to run this the first time
# Load
# from langchain.document_loaders import WebBaseLoader
# loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
# data = loader.load()
# # Split
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
# all_splits = text_splitter.split_documents(data)
#
# # Add to vectorDB
# vectorstore = Pinecone.from_documents(
# documents=all_splits, embedding=OpenAIEmbeddings(), index_name='langchain-test'
# )
# retriever = vectorstore.as_retriever()
vectorstore = Pinecone.from_existing_index("langchain-test", OpenAIEmbeddings())
retriever = vectorstore.as_retriever()
# Condense a chat history and follow-up question into a standalone question
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
# RAG answer synthesis prompt
template = """Answer the question based only on the following context:
<context>
{context}
</context>"""
ANSWER_PROMPT = ChatPromptTemplate.from_messages([
("system",template),
MessagesPlaceholder(variable_name="chat_history"),
("user", "{question}")
])
# Conversational Retrieval Chain
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
def _combine_documents(docs, document_prompt = DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"):
doc_strings = [format_document(doc, document_prompt) for doc in docs]
return document_separator.join(doc_strings)
def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
buffer = []
for human, ai in chat_history:
buffer.append(HumanMessage(content=human))
buffer.append(AIMessage(content=ai))
return buffer
# User input
class ChatHistory(BaseModel):
chat_history: List[Tuple[str, str]]
question: str
_search_query = RunnableBranch(
# If input includes chat_history, we condense it with the follow-up question
(
RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
run_name="HasChatHistoryCheck"
), # Condense follow-up question and chat into a standalone_question
RunnablePassthrough.assign(
chat_history=lambda x: _format_chat_history(x['chat_history'])
) | CONDENSE_QUESTION_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),
),
# Else, we have no chat history, so just pass through the question
RunnableLambda(itemgetter("question"))
)
_inputs = RunnableMap({
"question": lambda x: x["question"],
"chat_history": lambda x: _format_chat_history(x['chat_history']),
"context": _search_query | retriever | _combine_documents
}).with_types(input_type=ChatHistory)
chain = _inputs | ANSWER_PROMPT | ChatOpenAI() | StrOutputParser()