WIP: Harrison/base retriever (#1765)

This commit is contained in:
Harrison Chase
2023-03-24 07:46:49 -07:00
committed by GitHub
parent 4f364db9a9
commit 47d37db2d2
25 changed files with 509 additions and 339 deletions

View File

@@ -83,7 +83,7 @@ In addition, we also have some more generic resources for evaluation.
`Question Answering <./evaluation/question_answering.html>`_: An overview of LLMs aimed at evaluating question answering systems in general.
`Data Augmented Question Answering <./evaluation/data_augmented_question_answering.html>`_: An end-to-end example of evaluating a question answering system focused on a specific document (a VectorDBQAChain to be precise). This example highlights how to use LLMs to come up with question/answer examples to evaluate over, and then highlights how to use LLMs to evaluate performance on those generated examples.
`Data Augmented Question Answering <./evaluation/data_augmented_question_answering.html>`_: An end-to-end example of evaluating a question answering system focused on a specific document (a RetrievalQAChain to be precise). This example highlights how to use LLMs to come up with question/answer examples to evaluate over, and then highlights how to use LLMs to evaluate performance on those generated examples.
`Hugging Face Datasets <./evaluation/huggingface_datasets.html>`_: Covers an example of loading and using a dataset from Hugging Face for evaluation.

View File

@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 1,
"id": "7b57a50f",
"metadata": {},
"outputs": [],
@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "5b2d5e98",
"metadata": {},
"outputs": [
@@ -49,7 +49,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a7abbc20615d4c58b75a055a790d7212",
"model_id": "4c389519842e4b65afc33006a531dcbc",
"version_major": 2,
"version_minor": 0
},
@@ -68,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 3,
"id": "61375342",
"metadata": {},
"outputs": [
@@ -81,7 +81,7 @@
" {'tool': None, 'tool_input': 'What is the purpose of the NATO Alliance?'}]}"
]
},
"execution_count": 16,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -92,7 +92,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 4,
"id": "02500304",
"metadata": {},
"outputs": [
@@ -105,7 +105,7 @@
" {'tool': None, 'tool_input': 'What is the purpose of YC?'}]}"
]
},
"execution_count": 22,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -125,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"id": "c18680b5",
"metadata": {},
"outputs": [],
@@ -136,7 +136,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"id": "7f0de2b3",
"metadata": {},
"outputs": [],
@@ -146,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"id": "ef84ff99",
"metadata": {},
"outputs": [
@@ -173,23 +173,23 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"id": "8843cb0c",
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import VectorDBQA\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.llms import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 9,
"id": "573719a0",
"metadata": {},
"outputs": [],
"source": [
"chain_sota = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", vectorstore=vectorstore_sota, input_key=\"question\")"
"chain_sota = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", retriever=vectorstore_sota, input_key=\"question\")\n"
]
},
{
@@ -202,7 +202,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"id": "c2dbb014",
"metadata": {},
"outputs": [],
@@ -212,7 +212,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"id": "98d16f08",
"metadata": {},
"outputs": [
@@ -231,12 +231,12 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 12,
"id": "ec0aab02",
"metadata": {},
"outputs": [],
"source": [
"chain_pg = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", vectorstore=vectorstore_pg, input_key=\"question\")\n"
"chain_pg = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", retriever=vectorstore_pg, input_key=\"question\")\n"
]
},
{
@@ -249,7 +249,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 13,
"id": "ade1aafa",
"metadata": {},
"outputs": [],
@@ -271,7 +271,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 14,
"id": "104853f8",
"metadata": {},
"outputs": [],
@@ -291,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 15,
"id": "4664e79f",
"metadata": {},
"outputs": [
@@ -301,7 +301,7 @@
"'The purpose of the NATO Alliance is to promote peace and security in the North Atlantic region by providing a collective defense against potential threats.'"
]
},
"execution_count": 48,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -321,8 +321,8 @@
},
{
"cell_type": "code",
"execution_count": 35,
"id": "24b4c66e",
"execution_count": null,
"id": "799f6c17",
"metadata": {},
"outputs": [],
"source": [
@@ -349,23 +349,10 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": null,
"id": "1d583f03",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'input': 'What is the purpose of the NATO Alliance?',\n",
" 'answer': 'The purpose of the NATO Alliance is to secure peace and stability in Europe after World War 2.',\n",
" 'output': 'The purpose of the NATO Alliance is to promote peace and security in the North Atlantic region by providing a collective defense against potential threats.'}"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"predictions[0]"
]
@@ -380,7 +367,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": null,
"id": "d0a9341d",
"metadata": {},
"outputs": [],

View File

@@ -23,7 +23,8 @@
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain import OpenAI, VectorDBQA"
"from langchain.llms import OpenAI\n",
"from langchain.chains import RetrievalQA"
]
},
{
@@ -50,7 +51,7 @@
"\n",
"embeddings = OpenAIEmbeddings()\n",
"docsearch = Chroma.from_documents(texts, embeddings)\n",
"qa = VectorDBQA.from_llm(llm=OpenAI(), vectorstore=docsearch)"
"qa = RetrievalQA.from_llm(llm=OpenAI(), retriever=docsearch.as_retriever())"
]
},
{
@@ -434,7 +435,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.9.1"
}
},
"nbformat": 4,

View File

@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 1,
"id": "3bd13ab7",
"metadata": {},
"outputs": [],
@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "5b2d5e98",
"metadata": {},
"outputs": [
@@ -49,7 +49,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "63f434a42cba4739919333c75324acc9",
"model_id": "9264acfe710b4faabf060f0fcf4f7308",
"version_major": 2,
"version_minor": 0
},
@@ -77,7 +77,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "c18680b5",
"metadata": {},
"outputs": [],
@@ -88,7 +88,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "7f0de2b3",
"metadata": {},
"outputs": [],
@@ -98,7 +98,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "ef84ff99",
"metadata": {},
"outputs": [
@@ -125,23 +125,23 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "8843cb0c",
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import VectorDBQA\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.llms import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"id": "573719a0",
"metadata": {},
"outputs": [],
"source": [
"chain = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=vectorstore, input_key=\"question\")"
"chain = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), input_key=\"question\")"
]
},
{

View File

@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 1,
"id": "f127fb04",
"metadata": {},
"outputs": [],
@@ -35,73 +35,17 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "5b2d5e98",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5d66c27b9b4744989843142f08f5c1b4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/21.0 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading and preparing dataset json/LangChainDatasets--question-answering-state-of-the-union to /Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--question-answering-state-of-the-union-a7e5a3b2db4f440d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n"
"Found cached dataset json (/Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--question-answering-state-of-the-union-a7e5a3b2db4f440d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9e21e2ab96a0491ea5e252720d7dfa26",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c883830e068c42d39da8406ab38574c4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/2.90k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3b085715e52e49948d2a59d27e004eba",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
@@ -109,27 +53,6 @@
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0 examples [00:00, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset json downloaded and prepared to /Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--question-answering-state-of-the-union-a7e5a3b2db4f440d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ee900d35e27d4843b42b31811b43212b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
@@ -207,7 +130,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import VectorDBQA\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.llms import OpenAI"
]
},
@@ -218,7 +141,7 @@
"metadata": {},
"outputs": [],
"source": [
"chain = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=vectorstore, input_key=\"question\")"
"chain = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", retriever=vectorstore.as_retriever(), input_key=\"question\")"
]
},
{