Harrison/0083 (#996)

Co-authored-by: Harrison Chase <harrisonchase@Harrisons-MBP.attlocal.net>
2025-09-05 21:12:48 +00:00 · 2023-02-11 08:29:28 -08:00
parent b7747017d7
commit e51fad1488
4 changed files with 86 additions and 9 deletions
--- a/docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb
+++ b/docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb
@@ -21,28 +21,83 @@
    "from langchain.vectorstores.faiss import FAISS\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from langchain.llms import OpenAI\n",
-    "from langchain.chains import ChatVectorDBChain"
+    "from langchain.chains import ChatVectorDBChain\n",
+    "from langchain.document_loaders import TextLoader"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cdff94be",
+   "metadata": {},
+   "source": [
+    "Load in documents. You can replace this with a loader for whatever type of data you want"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
+   "id": "01c46e92",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = TextLoader('../../state_of_the_union.txt')\n",
+    "documents = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e9be4779",
+   "metadata": {},
+   "source": [
+    "If you had multiple loaders that you wanted to combine, you do something like:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "433363a5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# loaders = [....]\n",
+    "# docs = []\n",
+    "# for loader in loaders:\n",
+    "#     docs.extend(loader.load())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "239475d2",
+   "metadata": {},
+   "source": [
+    "We now split the documents, create embeddings for them, and put them in a vectorstore. This allows us to do semantic search over them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
   "id": "a8930cf7",
   "metadata": {},
   "outputs": [],
   "source": [
-    "with open('../../state_of_the_union.txt') as f:\n",
-    "    state_of_the_union = f.read()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
-    "texts = text_splitter.split_text(state_of_the_union)\n",
+    "documents = text_splitter.split_documents(documents)\n",
    "\n",
    "embeddings = OpenAIEmbeddings()\n",
-    "vectorstore = FAISS.from_texts(texts, embeddings)"
+    "vectorstore = FAISS.from_documents(documents, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c96b118",
+   "metadata": {},
+   "source": [
+    "We now initialize the ChatVectorDBChain"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
   "id": "7b4110f3",
   "metadata": {},
   "outputs": [],
@@ -157,7 +212,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,