From 79d8556c22956d4ce6776e5b9690e216e4fbb738 Mon Sep 17 00:00:00 2001
From: Jacob Lee <jacoblee93@gmail.com>
Date: Wed, 26 Jun 2024 14:47:01 -0700
Subject: [PATCH] docs[patch]: Address feedback from docs users (#23550)

- Updates chat few shot prompt tutorial to show off a more cohesive
example
- Fix async Chromium loader guide
- Fix Excel loader install instructions
- Reformat Html2Text page
- Add install instructions to Azure OpenAI embeddings page
- Add missing dep install to SQL QA tutorial

@baskaryan
---
 docs/docs/how_to/few_shot_examples_chat.ipynb | 123 +++++++++-----
 .../document_loaders/async_chromium.ipynb     |  74 ++++----
 .../document_loaders/microsoft_excel.ipynb    |  33 +++-
 .../document_transformers/html2text.ipynb     | 160 +++++++++++-------
 .../text_embedding/azureopenai.ipynb          |  21 ++-
 docs/docs/tutorials/sql_qa.ipynb              |   4 +-
 6 files changed, 263 insertions(+), 152 deletions(-)

diff --git a/docs/docs/how_to/few_shot_examples_chat.ipynb b/docs/docs/how_to/few_shot_examples_chat.ipynb
index cee98c04113..0f9f1e321f5 100644
--- a/docs/docs/how_to/few_shot_examples_chat.ipynb
+++ b/docs/docs/how_to/few_shot_examples_chat.ipynb
@@ -51,7 +51,7 @@
     "- `examples`: A list of dictionary examples to include in the final prompt.\n",
     "- `example_prompt`: converts each example into 1 or more messages through its [`format_messages`](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=format_messages#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
     "\n",
-    "Below is a simple demonstration. First, define the examples you'd like to include:"
+    "Below is a simple demonstration. First, define the examples you'd like to include. Let's give the LLM an unfamiliar mathematical operator, denoted by the \"🦜\" emoji:"
    ]
   },
   {
@@ -59,17 +59,7 @@
    "execution_count": 1,
    "id": "5b79e400",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mWARNING: You are using pip version 22.0.4; however, version 24.0 is available.\n",
-      "You should consider upgrading via the '/Users/jacoblee/.pyenv/versions/3.10.5/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n",
-      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%pip install -qU langchain langchain-openai langchain-chroma\n",
     "\n",
@@ -79,9 +69,50 @@
     "os.environ[\"OPENAI_API_KEY\"] = getpass()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "30856d92",
+   "metadata": {},
+   "source": [
+    "If we try to ask the model what the result of this expression is, it will fail:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
+   "id": "174dec5b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='The expression \"2 🦜 9\" is not a standard mathematical operation or equation. It appears to be a combination of the number 2 and the parrot emoji 🦜 followed by the number 9. It does not have a specific mathematical meaning.', response_metadata={'token_usage': {'completion_tokens': 54, 'prompt_tokens': 17, 'total_tokens': 71}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-aad12dda-5c47-4a1e-9949-6fe94e03242a-0', usage_metadata={'input_tokens': 17, 'output_tokens': 54, 'total_tokens': 71})"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "model = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0.0)\n",
+    "\n",
+    "model.invoke(\"What is 2 🦜 9?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6d58385",
+   "metadata": {},
+   "source": [
+    "Now let's see what happens if we give the LLM some examples to work with. We'll define some below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
    "id": "0fc5a02a-6249-4e92-95c3-30fff9671e8b",
    "metadata": {
     "tags": []
@@ -91,8 +122,8 @@
     "from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate\n",
     "\n",
     "examples = [\n",
-    "    {\"input\": \"2+2\", \"output\": \"4\"},\n",
-    "    {\"input\": \"2+3\", \"output\": \"5\"},\n",
+    "    {\"input\": \"2 🦜 2\", \"output\": \"4\"},\n",
+    "    {\"input\": \"2 🦜 3\", \"output\": \"5\"},\n",
     "]"
    ]
   },
@@ -106,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "id": "65e72ad1-9060-47d0-91a1-bc130c8b98ac",
    "metadata": {
     "tags": []
@@ -116,7 +147,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[HumanMessage(content='2+2'), AIMessage(content='4'), HumanMessage(content='2+3'), AIMessage(content='5')]\n"
+      "[HumanMessage(content='2 🦜 2'), AIMessage(content='4'), HumanMessage(content='2 🦜 3'), AIMessage(content='5')]\n"
      ]
     }
    ],
@@ -146,7 +177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "id": "9f86d6d9-50de-41b6-b6c7-0f9980cc0187",
    "metadata": {
     "tags": []
@@ -162,9 +193,17 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "dd8029c5",
+   "metadata": {},
+   "source": [
+    "And now let's ask the model the initial question and see how it does:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
    "id": "97d443b1-6fae-4b36-bede-3ff7306288a3",
    "metadata": {
     "tags": []
@@ -173,10 +212,10 @@
     {
      "data": {
       "text/plain": [
-       "AIMessage(content='A triangle does not have a square. The square of a number is the result of multiplying the number by itself.', response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 52, 'total_tokens': 75}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-3456c4ef-7b4d-4adb-9e02-8079de82a47a-0')"
+       "AIMessage(content='11', response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 60, 'total_tokens': 61}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5ec4e051-262f-408e-ad00-3f2ebeb561c3-0', usage_metadata={'input_tokens': 60, 'output_tokens': 1, 'total_tokens': 61})"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -184,9 +223,9 @@
    "source": [
     "from langchain_openai import ChatOpenAI\n",
     "\n",
-    "chain = final_prompt | ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0.0)\n",
+    "chain = final_prompt | model\n",
     "\n",
-    "chain.invoke({\"input\": \"What's the square of a triangle?\"})"
+    "chain.invoke({\"input\": \"What is 2 🦜 9?\"})"
    ]
   },
   {
@@ -194,6 +233,8 @@
    "id": "70ab7114-f07f-46be-8874-3705a25aba5f",
    "metadata": {},
    "source": [
+    "And we can see that the model has now inferred that the parrot emoji means addition from the given few-shot examples!\n",
+    "\n",
     "## Dynamic few-shot prompting\n",
     "\n",
     "Sometimes you may want to select only a few examples from your overall set to show based on the input. For this, you can replace the `examples` passed into `FewShotChatMessagePromptTemplate` with an `example_selector`. The other components remain the same as above! Our dynamic few-shot prompt template would look like:\n",
@@ -208,7 +249,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
    "id": "ad66f06a-66fd-4fcc-8166-5d0e3c801e57",
    "metadata": {
     "tags": []
@@ -220,9 +261,9 @@
     "from langchain_openai import OpenAIEmbeddings\n",
     "\n",
     "examples = [\n",
-    "    {\"input\": \"2+2\", \"output\": \"4\"},\n",
-    "    {\"input\": \"2+3\", \"output\": \"5\"},\n",
-    "    {\"input\": \"2+4\", \"output\": \"6\"},\n",
+    "    {\"input\": \"2 🦜 2\", \"output\": \"4\"},\n",
+    "    {\"input\": \"2 🦜 3\", \"output\": \"5\"},\n",
+    "    {\"input\": \"2 🦜 4\", \"output\": \"6\"},\n",
     "    {\"input\": \"What did the cow say to the moon?\", \"output\": \"nothing at all\"},\n",
     "    {\n",
     "        \"input\": \"Write me a poem about the moon\",\n",
@@ -247,7 +288,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 10,
    "id": "7790303a-f722-452e-8921-b14bdf20bdff",
    "metadata": {
     "tags": []
@@ -257,10 +298,10 @@
      "data": {
       "text/plain": [
        "[{'input': 'What did the cow say to the moon?', 'output': 'nothing at all'},\n",
-       " {'input': '2+4', 'output': '6'}]"
+       " {'input': '2 🦜 4', 'output': '6'}]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -287,7 +328,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
    "id": "253c255e-41d7-45f6-9d88-c7a0ced4b1bd",
    "metadata": {
     "tags": []
@@ -297,7 +338,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[HumanMessage(content='2+3'), AIMessage(content='5'), HumanMessage(content='2+2'), AIMessage(content='4')]\n"
+      "[HumanMessage(content='2 🦜 3'), AIMessage(content='5'), HumanMessage(content='2 🦜 4'), AIMessage(content='6')]\n"
      ]
     }
    ],
@@ -317,7 +358,7 @@
     "    ),\n",
     ")\n",
     "\n",
-    "print(few_shot_prompt.invoke(input=\"What's 3+3?\").to_messages())"
+    "print(few_shot_prompt.invoke(input=\"What's 3 🦜 3?\").to_messages())"
    ]
   },
   {
@@ -330,7 +371,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 12,
    "id": "e731cb45-f0ea-422c-be37-42af2a6cb2c4",
    "metadata": {
     "tags": []
@@ -340,7 +381,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "messages=[HumanMessage(content='2+3'), AIMessage(content='5'), HumanMessage(content='2+2'), AIMessage(content='4')]\n"
+      "messages=[HumanMessage(content='2 🦜 3'), AIMessage(content='5'), HumanMessage(content='2 🦜 4'), AIMessage(content='6')]\n"
      ]
     }
    ],
@@ -353,7 +394,7 @@
     "    ]\n",
     ")\n",
     "\n",
-    "print(few_shot_prompt.invoke(input=\"What's 3+3?\"))"
+    "print(few_shot_prompt.invoke(input=\"What's 3 🦜 3?\"))"
    ]
   },
   {
@@ -368,7 +409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 13,
    "id": "0568cbc6-5354-47f1-ab4d-dfcc616cf583",
    "metadata": {
     "tags": []
@@ -377,10 +418,10 @@
     {
      "data": {
       "text/plain": [
-       "AIMessage(content='6', response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 51, 'total_tokens': 52}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-6bcbe158-a8e3-4a85-a754-1ba274a9f147-0')"
+       "AIMessage(content='6', response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 60, 'total_tokens': 61}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d1863e5e-17cd-4e9d-bf7a-b9f118747a65-0', usage_metadata={'input_tokens': 60, 'output_tokens': 1, 'total_tokens': 61})"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -388,7 +429,7 @@
    "source": [
     "chain = final_prompt | ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0.0)\n",
     "\n",
-    "chain.invoke({\"input\": \"What's 3+3?\"})"
+    "chain.invoke({\"input\": \"What's 3 🦜 3?\"})"
    ]
   },
   {
@@ -428,7 +469,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.5"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/integrations/document_loaders/async_chromium.ipynb b/docs/docs/integrations/document_loaders/async_chromium.ipynb
index 88cc2b84ce6..15cf2e32e84 100644
--- a/docs/docs/integrations/document_loaders/async_chromium.ipynb
+++ b/docs/docs/integrations/document_loaders/async_chromium.ipynb
@@ -13,7 +13,7 @@
     "\n",
     "Headless mode means that the browser is running without a graphical user interface.\n",
     "\n",
-    "`AsyncChromiumLoader` loads the page, and then we use `Html2TextTransformer` to transform to text."
+    "In the below example we'll use the `AsyncChromiumLoader` to loads the page, and then the [`Html2TextTransformer`](/docs/integrations/document_transformers/html2text/) to strip out the HTML tags and other semantic information."
    ]
   },
   {
@@ -23,48 +23,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install --upgrade --quiet  playwright beautifulsoup4\n",
+    "%pip install --upgrade --quiet playwright beautifulsoup4 html2text\n",
     "!playwright install"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "dd2cdea7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'<!DOCTYPE html><html lang=\"en\"><head><script src=\"https://s0.2mdn.net/instream/video/client.js\" asyn'"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from langchain_community.document_loaders import AsyncChromiumLoader\n",
-    "\n",
-    "urls = [\"https://www.wsj.com\"]\n",
-    "loader = AsyncChromiumLoader(urls, user_agent=\"MyAppUserAgent\")\n",
-    "docs = loader.load()\n",
-    "docs[0].page_content[0:100]"
-   ]
-  },
   {
    "cell_type": "markdown",
-   "id": "c64e7df9",
+   "id": "00487c0f",
    "metadata": {},
    "source": [
-    "If you are using Jupyter notebooks, you might need to apply `nest_asyncio` before loading the documents."
+    "**Note:** If you are using Jupyter notebooks, you might also need to install and apply `nest_asyncio` before loading the documents like this:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5f2fe3c0",
+   "id": "d374eef4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -74,6 +48,40 @@
     "nest_asyncio.apply()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "dd2cdea7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<!DOCTYPE html><html lang=\"en\" dir=\"ltr\" class=\"docs-wrapper docs-doc-page docs-version-2.0 plugin-d'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_community.document_loaders import AsyncChromiumLoader\n",
+    "\n",
+    "urls = [\"https://docs.smith.langchain.com/\"]\n",
+    "loader = AsyncChromiumLoader(urls, user_agent=\"MyAppUserAgent\")\n",
+    "docs = loader.load()\n",
+    "docs[0].page_content[0:100]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7eb5e6aa",
+   "metadata": {},
+   "source": [
+    "Now let's transform the documents into a more readable syntax using the transformer:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
@@ -83,7 +91,7 @@
     {
      "data": {
       "text/plain": [
-       "\"Skip to Main ContentSkip to SearchSkip to... Select * Top News * What's News *\\nFeatured Stories * Retirement * Life & Arts * Hip-Hop * Sports * Video *\\nEconomy * Real Estate * Sports * CMO * CIO * CFO * Risk & Compliance *\\nLogistics Report * Sustainable Business * Heard on the Street * Barron’s *\\nMarketWatch * Mansion Global * Penta * Opinion * Journal Reports * Sponsored\\nOffers Explore Our Brands * WSJ * * * * * Barron's * * * * * MarketWatch * * *\\n* * IBD # The Wall Street Journal SubscribeSig\""
+       "'Skip to main content\\n\\nGo to API Docs\\n\\nSearch`⌘``K`\\n\\nGo to App\\n\\n  * Quick start\\n  * Tutorials\\n\\n  * How-to guides\\n\\n  * Concepts\\n\\n  * Reference\\n\\n  * Pricing\\n  * Self-hosting\\n\\n  * LangGraph Cloud\\n\\n  *   * Quick start\\n\\nOn this page\\n\\n# Get started with LangSmith\\n\\n**LangSmith** is a platform for building production-grade LLM applications. It\\nallows you to closely monitor and evaluate your application, so you can ship\\nquickly and with confidence. Use of LangChain is not necessary - LangSmith\\nworks on it'"
       ]
      },
      "execution_count": 6,
@@ -116,7 +124,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.10.5"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/integrations/document_loaders/microsoft_excel.ipynb b/docs/docs/integrations/document_loaders/microsoft_excel.ipynb
index b2ec3acd244..d7cc5e8083c 100644
--- a/docs/docs/integrations/document_loaders/microsoft_excel.ipynb
+++ b/docs/docs/integrations/document_loaders/microsoft_excel.ipynb
@@ -12,35 +12,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "e6616e3a",
+   "execution_count": null,
+   "id": "0b01ee46",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_community.document_loaders import UnstructuredExcelLoader"
+    "%pip install --upgrade --quiet langchain-community unstructured openpyxl"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
    "id": "a654e4d9",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "4\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "Document(page_content='\\n  \\n    \\n      Team\\n      Location\\n      Stanley Cups\\n    \\n    \\n      Blues\\n      STL\\n      1\\n    \\n    \\n      Flyers\\n      PHI\\n      2\\n    \\n    \\n      Maple Leafs\\n      TOR\\n      13\\n    \\n  \\n', metadata={'source': 'example_data/stanley-cups.xlsx', 'filename': 'stanley-cups.xlsx', 'file_directory': 'example_data', 'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'page_number': 1, 'page_name': 'Stanley Cups', 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n  <tbody>\\n    <tr>\\n      <td>Team</td>\\n      <td>Location</td>\\n      <td>Stanley Cups</td>\\n    </tr>\\n    <tr>\\n      <td>Blues</td>\\n      <td>STL</td>\\n      <td>1</td>\\n    </tr>\\n    <tr>\\n      <td>Flyers</td>\\n      <td>PHI</td>\\n      <td>2</td>\\n    </tr>\\n    <tr>\\n      <td>Maple Leafs</td>\\n      <td>TOR</td>\\n      <td>13</td>\\n    </tr>\\n  </tbody>\\n</table>', 'category': 'Table'})"
+       "[Document(page_content='Stanley Cups', metadata={'source': 'example_data/stanley-cups.xlsx', 'file_directory': 'example_data', 'filename': 'stanley-cups.xlsx', 'last_modified': '2023-12-19T13:42:18', 'page_name': 'Stanley Cups', 'page_number': 1, 'languages': ['eng'], 'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'category': 'Title'}),\n",
+       " Document(page_content='\\n\\n\\nTeam\\nLocation\\nStanley Cups\\n\\n\\nBlues\\nSTL\\n1\\n\\n\\nFlyers\\nPHI\\n2\\n\\n\\nMaple Leafs\\nTOR\\n13\\n\\n\\n', metadata={'source': 'example_data/stanley-cups.xlsx', 'file_directory': 'example_data', 'filename': 'stanley-cups.xlsx', 'last_modified': '2023-12-19T13:42:18', 'page_name': 'Stanley Cups', 'page_number': 1, 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n  <tbody>\\n    <tr>\\n      <td>Team</td>\\n      <td>Location</td>\\n      <td>Stanley Cups</td>\\n    </tr>\\n    <tr>\\n      <td>Blues</td>\\n      <td>STL</td>\\n      <td>1</td>\\n    </tr>\\n    <tr>\\n      <td>Flyers</td>\\n      <td>PHI</td>\\n      <td>2</td>\\n    </tr>\\n    <tr>\\n      <td>Maple Leafs</td>\\n      <td>TOR</td>\\n      <td>13</td>\\n    </tr>\\n  </tbody>\\n</table>', 'languages': ['eng'], 'parent_id': '17e9a90f9616f2abed8cf32b5bd3810d', 'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'category': 'Table'}),\n",
+       " Document(page_content='Stanley Cups Since 67', metadata={'source': 'example_data/stanley-cups.xlsx', 'file_directory': 'example_data', 'filename': 'stanley-cups.xlsx', 'last_modified': '2023-12-19T13:42:18', 'page_name': 'Stanley Cups Since 67', 'page_number': 2, 'languages': ['eng'], 'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'category': 'Title'}),\n",
+       " Document(page_content='\\n\\n\\nTeam\\nLocation\\nStanley Cups\\n\\n\\nBlues\\nSTL\\n1\\n\\n\\nFlyers\\nPHI\\n2\\n\\n\\nMaple Leafs\\nTOR\\n0\\n\\n\\n', metadata={'source': 'example_data/stanley-cups.xlsx', 'file_directory': 'example_data', 'filename': 'stanley-cups.xlsx', 'last_modified': '2023-12-19T13:42:18', 'page_name': 'Stanley Cups Since 67', 'page_number': 2, 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n  <tbody>\\n    <tr>\\n      <td>Team</td>\\n      <td>Location</td>\\n      <td>Stanley Cups</td>\\n    </tr>\\n    <tr>\\n      <td>Blues</td>\\n      <td>STL</td>\\n      <td>1</td>\\n    </tr>\\n    <tr>\\n      <td>Flyers</td>\\n      <td>PHI</td>\\n      <td>2</td>\\n    </tr>\\n    <tr>\\n      <td>Maple Leafs</td>\\n      <td>TOR</td>\\n      <td>0</td>\\n    </tr>\\n  </tbody>\\n</table>', 'languages': ['eng'], 'parent_id': 'ee34bd8c186b57e3530d5443ffa58122', 'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'category': 'Table'})]"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "from langchain_community.document_loaders import UnstructuredExcelLoader\n",
+    "\n",
     "loader = UnstructuredExcelLoader(\"example_data/stanley-cups.xlsx\", mode=\"elements\")\n",
     "docs = loader.load()\n",
-    "docs[0]"
+    "\n",
+    "print(len(docs))\n",
+    "\n",
+    "docs"
    ]
   },
   {
@@ -76,7 +91,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install --upgrade --quiet  langchain langchain-community azure-ai-documentintelligence"
+    "%pip install --upgrade --quiet langchain langchain-community azure-ai-documentintelligence"
    ]
   },
   {
@@ -115,7 +130,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.10.5"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/integrations/document_transformers/html2text.ipynb b/docs/docs/integrations/document_transformers/html2text.ipynb
index 804f11ad1f0..ee7b1db9526 100644
--- a/docs/docs/integrations/document_transformers/html2text.ipynb
+++ b/docs/docs/integrations/document_transformers/html2text.ipynb
@@ -19,12 +19,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install --upgrade --quiet  html2text"
+    "%pip install --upgrade --quiet html2text"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "id": "8ca0974b",
    "metadata": {},
    "outputs": [
@@ -32,7 +32,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Fetching pages: 100%|############| 2/2 [00:00<00:00, 10.75it/s]\n"
+      "USER_AGENT environment variable not set, consider setting it to identify your requests.\n",
+      "Fetching pages: 100%|##########| 2/2 [00:00<00:00, 14.74it/s]\n"
      ]
     }
    ],
@@ -46,66 +47,107 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "ddf2be97",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_community.document_transformers import Html2TextTransformer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "id": "a95a928c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "## Fantasy\n",
+      "\n",
+      "  * Football\n",
+      "\n",
+      "  * Baseball\n",
+      "\n",
+      "  * Basketball\n",
+      "\n",
+      "  * Hockey\n",
+      "\n",
+      "## ESPN Sites\n",
+      "\n",
+      "  * ESPN Deportes\n",
+      "\n",
+      "  * Andscape\n",
+      "\n",
+      "  * espnW\n",
+      "\n",
+      "  * ESPNFC\n",
+      "\n",
+      "  * X Games\n",
+      "\n",
+      "  * SEC Network\n",
+      "\n",
+      "## ESPN Apps\n",
+      "\n",
+      "  * ESPN\n",
+      "\n",
+      "  * ESPN Fantasy\n",
+      "\n",
+      "  * Tournament Challenge\n",
+      "\n",
+      "## Follow ESPN\n",
+      "\n",
+      "  * Facebook\n",
+      "\n",
+      "  * X/Twitter\n",
+      "\n",
+      "  * Instagram\n",
+      "\n",
+      "  * Snapchat\n",
+      "\n",
+      "  * TikTok\n",
+      "\n",
+      "  * YouTube\n",
+      "\n",
+      "## Fresh updates to our NBA mock draft: Everything we're hearing hours before\n",
+      "Round 1\n",
+      "\n",
+      "With hours until Round 1 begins (8 p.m. ET on ESPN and ABC), ESPN draft\n",
+      "insiders Jonathan Givony and Jeremy Woo have new intel on lottery picks and\n",
+      "more.\n",
+      "\n",
+      "2hJonathan Givony and Jeremy Woo\n",
+      "\n",
+      "Illustration by ESPN\n",
+      "\n",
+      "## From No. 1 to 100: Ranking the 2024 NBA draft prospects\n",
+      "\n",
+      "Who's No. 1? Where do the Kentucky, Duke and UConn players rank? Here's our\n",
+      "final Top 100 Big Board.\n",
+      "\n",
+      "6hJonathan Givony and Jeremy Woo\n",
+      "\n",
+      "  * Full draft order: All 58 picks over two rounds\n",
+      "  * Trade tracker: Details for all deals\n",
+      "\n",
+      "  * Betting buzz: Lakers favorites to draft Bronny\n",
+      "  * Use our NBA draft simu\n",
+      "ent system, LLM functions as the agent's brain,\n",
+      "complemented by several key components:\n",
+      "\n",
+      "  * **Planning**\n",
+      "    * Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\n",
+      "    * Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n",
+      "  * **Memory**\n",
+      "    * Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\n",
+      "    * Long-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\n",
+      "  * **Tool use**\n",
+      "    * The agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including \n"
+     ]
+    }
+   ],
    "source": [
+    "from langchain_community.document_transformers import Html2TextTransformer\n",
+    "\n",
     "urls = [\"https://www.espn.com\", \"https://lilianweng.github.io/posts/2023-06-23-agent/\"]\n",
     "html2text = Html2TextTransformer()\n",
-    "docs_transformed = html2text.transform_documents(docs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "18ef9fe9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"  * ESPNFC\\n\\n  * X Games\\n\\n  * SEC Network\\n\\n## ESPN Apps\\n\\n  * ESPN\\n\\n  * ESPN Fantasy\\n\\n## Follow ESPN\\n\\n  * Facebook\\n\\n  * Twitter\\n\\n  * Instagram\\n\\n  * Snapchat\\n\\n  * YouTube\\n\\n  * The ESPN Daily Podcast\\n\\n2023 FIFA Women's World Cup\\n\\n## Follow live: Canada takes on Nigeria in group stage of Women's World Cup\\n\\n2m\\n\\nEPA/Morgan Hancock\\n\\n## TOP HEADLINES\\n\\n  * Snyder fined $60M over findings in investigation\\n  * NFL owners approve $6.05B sale of Commanders\\n  * Jags assistant comes out as gay in NFL milestone\\n  * O's alone atop East after topping slumping Rays\\n  * ACC's Phillips: Never condoned hazing at NU\\n\\n  * Vikings WR Addison cited for driving 140 mph\\n  * 'Taking his time': Patient QB Rodgers wows Jets\\n  * Reyna got U.S. assurances after Berhalter rehire\\n  * NFL Future Power Rankings\\n\\n## USWNT AT THE WORLD CUP\\n\\n### USA VS. VIETNAM: 9 P.M. ET FRIDAY\\n\\n## How do you defend against Alex Morgan? Former opponents sound off\\n\\nThe U.S. forward is unstoppable at this level, scoring 121 goals and adding 49\""
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs_transformed[0].page_content[1000:2000]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "6045d660",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"t's brain,\\ncomplemented by several key components:\\n\\n  * **Planning**\\n    * Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\\n    * Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\\n  * **Memory**\\n    * Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as utilizing short-term memory of the model to learn.\\n    * Long-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.\\n  * **Tool use**\\n    * The agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution c\""
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "docs_transformed[1].page_content[1000:2000]"
+    "docs_transformed = html2text.transform_documents(docs)\n",
+    "\n",
+    "print(docs_transformed[0].page_content[1000:2000])\n",
+    "\n",
+    "print(docs_transformed[1].page_content[1000:2000])"
    ]
   }
  ],
@@ -125,7 +167,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.5"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/integrations/text_embedding/azureopenai.ipynb b/docs/docs/integrations/text_embedding/azureopenai.ipynb
index 4f9cb5e8247..8f32e40a1c8 100644
--- a/docs/docs/integrations/text_embedding/azureopenai.ipynb
+++ b/docs/docs/integrations/text_embedding/azureopenai.ipynb
@@ -20,6 +20,16 @@
     "Let's load the Azure OpenAI Embedding class with environment variables set to indicate to use Azure endpoints."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "228faf0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --upgrade --quiet langchain_openai"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -180,9 +190,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "poetry-venv",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "poetry-venv"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -194,12 +204,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "7377c2ccc78bc62c2683122d48c8cd1fb85a53850a1b1fc29736ed39852c9885"
-   }
+   "version": "3.10.5"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/tutorials/sql_qa.ipynb b/docs/docs/tutorials/sql_qa.ipynb
index 9e86606f3b6..1acfd308810 100644
--- a/docs/docs/tutorials/sql_qa.ipynb
+++ b/docs/docs/tutorials/sql_qa.ipynb
@@ -48,14 +48,14 @@
    "outputs": [],
    "source": [
     "%%capture --no-stderr\n",
-    "%pip install --upgrade --quiet  langchain langchain-community langchain-openai"
+    "%pip install --upgrade --quiet langchain langchain-community langchain-openai faiss-cpu"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We will use an OpenAI model in this guide."
+    "We will use an OpenAI model and a [FAISS-powered vector store](/docs/integrations/vectorstores/faiss/) in this guide."
    ]
   },
   {