mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-19 13:23:35 +00:00
vector db qa (#71)
This commit is contained in:
parent
4c0b684f79
commit
47af2bcee4
94
examples/vector_db_qa.ipynb
Normal file
94
examples/vector_db_qa.ipynb
Normal file
@ -0,0 +1,94 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "82525493",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores.faiss import FAISS\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain import OpenAI, VectorDBQA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "5c7049db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"docsearch = FAISS.from_texts(texts, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3018f865",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa = VectorDBQA(llm=OpenAI(), vectorstore=docsearch)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "032a47f8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of our nation's top legal minds, who will continue Justice Breyer’s legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"qa.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0f20b92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -14,6 +14,7 @@ from langchain.chains import (
|
||||
SelfAskWithSearchChain,
|
||||
SerpAPIChain,
|
||||
SQLDatabaseChain,
|
||||
VectorDBQA,
|
||||
)
|
||||
from langchain.docstore import Wikipedia
|
||||
from langchain.llms import Cohere, HuggingFaceHub, OpenAI
|
||||
@ -39,5 +40,6 @@ __all__ = [
|
||||
"SQLDatabaseChain",
|
||||
"FAISS",
|
||||
"MRKLChain",
|
||||
"VectorDBQA",
|
||||
"ElasticVectorSearch",
|
||||
]
|
||||
|
@ -7,6 +7,7 @@ from langchain.chains.react.base import ReActChain
|
||||
from langchain.chains.self_ask_with_search.base import SelfAskWithSearchChain
|
||||
from langchain.chains.serpapi import SerpAPIChain
|
||||
from langchain.chains.sql_database.base import SQLDatabaseChain
|
||||
from langchain.chains.vector_db_qa.base import VectorDBQA
|
||||
|
||||
__all__ = [
|
||||
"LLMChain",
|
||||
@ -17,4 +18,5 @@ __all__ = [
|
||||
"ReActChain",
|
||||
"SQLDatabaseChain",
|
||||
"MRKLChain",
|
||||
"VectorDBQA",
|
||||
]
|
||||
|
1
langchain/chains/vector_db_qa/__init__.py
Normal file
1
langchain/chains/vector_db_qa/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Chain for question-answering against a vector database."""
|
80
langchain/chains/vector_db_qa/base.py
Normal file
80
langchain/chains/vector_db_qa/base.py
Normal file
@ -0,0 +1,80 @@
|
||||
"""Chain for question-answering against a vector database."""
|
||||
from typing import Dict, List
|
||||
|
||||
from pydantic import BaseModel, Extra
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.vector_db_qa.prompt import prompt
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
|
||||
class VectorDBQA(Chain, BaseModel):
|
||||
"""Chain for question-answering against a vector database.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain import OpenAI, VectorDBQA
|
||||
from langchain.faiss import FAISS
|
||||
vectordb = FAISS(...)
|
||||
vectordbQA = VectorDBQA(llm=OpenAI(), vector_db=vectordb)
|
||||
|
||||
"""
|
||||
|
||||
llm: LLM
|
||||
"""LLM wrapper to use."""
|
||||
vectorstore: VectorStore
|
||||
"""Vector Database to connect to."""
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the singular input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the singular output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
def _run(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
question = inputs[self.input_key]
|
||||
llm_chain = LLMChain(llm=self.llm, prompt=prompt)
|
||||
docs = self.vectorstore.similarity_search(question)
|
||||
contexts = []
|
||||
for j, doc in enumerate(docs):
|
||||
contexts.append(f"Context {j}:\n{doc.page_content}")
|
||||
# TODO: handle cases where this context is too long.
|
||||
answer = llm_chain.predict(question=question, context="\n\n".join(contexts))
|
||||
return {self.output_key: answer}
|
||||
|
||||
def run(self, question: str) -> str:
|
||||
"""Run Question-Answering on a vector database.
|
||||
|
||||
Args:
|
||||
question: Question to get the answer for.
|
||||
|
||||
Returns:
|
||||
The final answer
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
answer = vectordbqa.run("What is the capital of Idaho?")
|
||||
"""
|
||||
return self({self.input_key: question})[self.output_key]
|
10
langchain/chains/vector_db_qa/prompt.py
Normal file
10
langchain/chains/vector_db_qa/prompt.py
Normal file
@ -0,0 +1,10 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts import Prompt
|
||||
|
||||
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
Helpful Answer:"""
|
||||
prompt = Prompt(template=prompt_template, input_variables=["context", "question"])
|
Loading…
Reference in New Issue
Block a user