vector db qa (#71)

2025-09-27 22:37:46 +00:00 · 2022-11-12 07:24:49 -08:00
parent 4c0b684f79
commit 47af2bcee4
6 changed files with 189 additions and 0 deletions
--- a/examples/vector_db_qa.ipynb
+++ b/examples/vector_db_qa.ipynb
@@ -0,0 +1,94 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "82525493",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+    "from langchain.vectorstores.faiss import FAISS\n",
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "from langchain import OpenAI, VectorDBQA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "5c7049db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('state_of_the_union.txt') as f:\n",
+    "    state_of_the_union = f.read()\n",
+    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+    "texts = text_splitter.split_text(state_of_the_union)\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings()\n",
+    "docsearch = FAISS.from_texts(texts, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "3018f865",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qa = VectorDBQA(llm=OpenAI(), vectorstore=docsearch)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "032a47f8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\" The president said that Ketanji Brown Jackson is one of our nation's top legal minds, who will continue Justice Breyer’s legacy of excellence.\""
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+    "qa.run(query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0f20b92",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/langchain/init.py
+++ b/langchain/init.py
@@ -14,6 +14,7 @@ from langchain.chains import (
    SelfAskWithSearchChain,
    SerpAPIChain,
    SQLDatabaseChain,
+    VectorDBQA,
 )
 from langchain.docstore import Wikipedia
 from langchain.llms import Cohere, HuggingFaceHub, OpenAI
@@ -39,5 +40,6 @@ __all__ = [
    "SQLDatabaseChain",
    "FAISS",
    "MRKLChain",
+    "VectorDBQA",
    "ElasticVectorSearch",
 ]
--- a/langchain/chains/init.py
+++ b/langchain/chains/init.py
@@ -7,6 +7,7 @@ from langchain.chains.react.base import ReActChain
 from langchain.chains.self_ask_with_search.base import SelfAskWithSearchChain
 from langchain.chains.serpapi import SerpAPIChain
 from langchain.chains.sql_database.base import SQLDatabaseChain
+from langchain.chains.vector_db_qa.base import VectorDBQA

 __all__ = [
    "LLMChain",
@@ -17,4 +18,5 @@ __all__ = [
    "ReActChain",
    "SQLDatabaseChain",
    "MRKLChain",
+    "VectorDBQA",
 ]
--- a/langchain/chains/vector_db_qa/init.py
+++ b/langchain/chains/vector_db_qa/init.py
@@ -0,0 +1 @@
+"""Chain for question-answering against a vector database."""
--- a/langchain/chains/vector_db_qa/base.py
+++ b/langchain/chains/vector_db_qa/base.py
@@ -0,0 +1,80 @@
+"""Chain for question-answering against a vector database."""
+from typing import Dict, List
+
+from pydantic import BaseModel, Extra
+
+from langchain.chains.base import Chain
+from langchain.chains.llm import LLMChain
+from langchain.chains.vector_db_qa.prompt import prompt
+from langchain.llms.base import LLM
+from langchain.vectorstores.base import VectorStore
+
+
+class VectorDBQA(Chain, BaseModel):
+    """Chain for question-answering against a vector database.
+
+    Example:
+        .. code-block:: python
+
+            from langchain import OpenAI, VectorDBQA
+            from langchain.faiss import FAISS
+            vectordb = FAISS(...)
+            vectordbQA = VectorDBQA(llm=OpenAI(), vector_db=vectordb)
+
+    """
+
+    llm: LLM
+    """LLM wrapper to use."""
+    vectorstore: VectorStore
+    """Vector Database to connect to."""
+    input_key: str = "query"  #: :meta private:
+    output_key: str = "result"  #: :meta private:
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+        arbitrary_types_allowed = True
+
+    @property
+    def input_keys(self) -> List[str]:
+        """Return the singular input key.
+
+        :meta private:
+        """
+        return [self.input_key]
+
+    @property
+    def output_keys(self) -> List[str]:
+        """Return the singular output key.
+
+        :meta private:
+        """
+        return [self.output_key]
+
+    def _run(self, inputs: Dict[str, str]) -> Dict[str, str]:
+        question = inputs[self.input_key]
+        llm_chain = LLMChain(llm=self.llm, prompt=prompt)
+        docs = self.vectorstore.similarity_search(question)
+        contexts = []
+        for j, doc in enumerate(docs):
+            contexts.append(f"Context {j}:\n{doc.page_content}")
+        # TODO: handle cases where this context is too long.
+        answer = llm_chain.predict(question=question, context="\n\n".join(contexts))
+        return {self.output_key: answer}
+
+    def run(self, question: str) -> str:
+        """Run Question-Answering on a vector database.
+
+        Args:
+            question: Question to get the answer for.
+
+        Returns:
+            The final answer
+
+        Example:
+            .. code-block:: python
+
+                answer = vectordbqa.run("What is the capital of Idaho?")
+        """
+        return self({self.input_key: question})[self.output_key]
--- a/langchain/chains/vector_db_qa/prompt.py
+++ b/langchain/chains/vector_db_qa/prompt.py
@@ -0,0 +1,10 @@
+# flake8: noqa
+from langchain.prompts import Prompt
+
+prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+
+{context}
+
+Question: {question}
+Helpful Answer:"""
+prompt = Prompt(template=prompt_template, input_variables=["context", "question"])
				`@@ -0,0 +1 @@`
				`"""Chain for question-answering against a vector database."""`