Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Lance Martin <lance@langchain.dev>
Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
Erick Friis
2023-10-25 18:47:42 -07:00
committed by GitHub
parent 43257a295c
commit ebf998acb6
242 changed files with 53432 additions and 31 deletions

View File

@@ -0,0 +1,3 @@
from rag_elasticsearch.chain import chain
__all__ = ["chain"]

View File

@@ -0,0 +1,56 @@
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableMap
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores.elasticsearch import ElasticsearchStore
from langchain.schema import format_document
from typing import Tuple, List
from operator import itemgetter
from .prompts import CONDENSE_QUESTION_PROMPT, LLM_CONTEXT_PROMPT, DOCUMENT_PROMPT
from .connection import es_connection_details
# Setup connecting to Elasticsearch
vectorstore = ElasticsearchStore(
**es_connection_details,
embedding=HuggingFaceEmbeddings(
model_name="all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}
),
index_name="workplace-search-example",
)
retriever = vectorstore.as_retriever()
# Set up LLM to user
llm = ChatOpenAI(temperature=0)
def _combine_documents(
docs, document_prompt=DOCUMENT_PROMPT, document_separator="\n\n"
):
doc_strings = [format_document(doc, document_prompt) for doc in docs]
return document_separator.join(doc_strings)
def _format_chat_history(chat_history: List[Tuple]) -> str:
buffer = ""
for dialogue_turn in chat_history:
human = "Human: " + dialogue_turn[0]
ai = "Assistant: " + dialogue_turn[1]
buffer += "\n" + "\n".join([human, ai])
return buffer
_inputs = RunnableMap(
standalone_question=RunnablePassthrough.assign(
chat_history=lambda x: _format_chat_history(x["chat_history"])
)
| CONDENSE_QUESTION_PROMPT
| llm
| StrOutputParser(),
)
_context = {
"context": itemgetter("standalone_question") | retriever | _combine_documents,
"question": lambda x: x["standalone_question"],
}
chain = _inputs | _context | LLM_CONTEXT_PROMPT | llm | StrOutputParser()

View File

@@ -0,0 +1,15 @@
import os
ELASTIC_CLOUD_ID = os.getenv("ELASTIC_CLOUD_ID")
ELASTIC_USERNAME = os.getenv("ELASTIC_USERNAME", "elastic")
ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD")
ES_URL = os.getenv("ES_URL", "http://localhost:9200")
if ELASTIC_CLOUD_ID and ELASTIC_USERNAME and ELASTIC_PASSWORD:
es_connection_details = {
"es_cloud_id": ELASTIC_CLOUD_ID,
"es_user": ELASTIC_USERNAME,
"es_password": ELASTIC_PASSWORD,
}
else:
es_connection_details = {"es_url": ES_URL}

View File

@@ -0,0 +1,39 @@
from langchain.prompts import ChatPromptTemplate, PromptTemplate
# Used to condense a question and chat history into a single question
condense_question_prompt_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. If there is no chat history, just rephrase the question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(
condense_question_prompt_template
)
# RAG Prompt to provide the context and question for LLM to answer
# We also ask the LLM to cite the source of the passage it is answering from
llm_context_prompt_template = """
Use the following passages to answer the user's question.
Each passage has a SOURCE which is the title of the document. When answering, cite source name of the passages you are answering from below the answer in a unique bullet point list.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----
{context}
----
Question: {question}
"""
LLM_CONTEXT_PROMPT = ChatPromptTemplate.from_template(llm_context_prompt_template)
# Used to build a context window from passages retrieved
document_prompt_template = """
---
NAME: {name}
PASSAGE:
{page_content}
---
"""
DOCUMENT_PROMPT = PromptTemplate.from_template(document_prompt_template)