From a349fce8802661ceaabb65d65b48e97b9c325066 Mon Sep 17 00:00:00 2001
From: Zheng Robert Jia <jia00129@umn.edu>
Date: Thu, 20 Jun 2024 14:36:49 -0500
Subject: [PATCH] docs[minor],community[patch]: Minor tutorial docs
 improvement, minor import error quick fix.  (#22725)

minor changes to module import error handling and minor issues in
tutorial documents.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
---
 docs/docs/how_to/document_loader_pdf.ipynb       | 11 +++++++++++
 .../document_loaders/microsoft_powerpoint.ipynb  | 13 +++++++++++++
 docs/docs/tutorials/qa_chat_history.ipynb        | 10 ++--------
 docs/scripts/generate_api_reference_links.py     |  3 ++-
 .../document_loaders/parsers/pdf.py              | 16 ++++++++++++++--
 5 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/docs/docs/how_to/document_loader_pdf.ipynb b/docs/docs/how_to/document_loader_pdf.ipynb
index a27f9383cf8..4a5275e9812 100644
--- a/docs/docs/how_to/document_loader_pdf.ipynb
+++ b/docs/docs/how_to/document_loader_pdf.ipynb
@@ -69,6 +69,17 @@
     "Once we have loaded PDFs into LangChain `Document` objects, we can index them (e.g., a RAG application) in the usual way:"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3b932bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install faiss-cpu \n",
+    "# use `pip install faiss-gpu` for CUDA GPU support"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/docs/docs/integrations/document_loaders/microsoft_powerpoint.ipynb b/docs/docs/integrations/document_loaders/microsoft_powerpoint.ipynb
index 670d5c70ee7..7d463b1e259 100644
--- a/docs/docs/integrations/document_loaders/microsoft_powerpoint.ipynb
+++ b/docs/docs/integrations/document_loaders/microsoft_powerpoint.ipynb
@@ -12,6 +12,19 @@
     "This covers how to load `Microsoft PowerPoint` documents into a document format that we can use downstream."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aef1500f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install packages\n",
+    "%pip install unstructured\n",
+    "%pip install python-magic\n",
+    "%pip install python-pptx"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
diff --git a/docs/docs/tutorials/qa_chat_history.ipynb b/docs/docs/tutorials/qa_chat_history.ipynb
index cb032f8d59e..aa1cae52651 100644
--- a/docs/docs/tutorials/qa_chat_history.ipynb
+++ b/docs/docs/tutorials/qa_chat_history.ipynb
@@ -322,7 +322,7 @@
     "\n",
     "Now we can build our full QA chain. This is as simple as updating the retriever to be our new `history_aware_retriever`.\n",
     "\n",
-    "Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n",
+    "Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer. A more detailed explaination is over [here](/docs/tutorials/rag/#built-in-chains)\n",
     "\n",
     "We build our final `rag_chain` with [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html). This chain applies the `history_aware_retriever` and `question_answer_chain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output."
    ]
@@ -760,13 +760,6 @@
    "id": "931c4fe3-c603-4efb-9b37-5f7cbbb1cbbd",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Error in LangChainTracer.on_tool_end callback: TracerException(\"Found chain run at ID 0ec120e2-b1fc-4593-9fee-2dd4f4cae256, but expected {'tool'} run.\")\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
@@ -1030,6 +1023,7 @@
     "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
     "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
     "from langgraph.checkpoint.sqlite import SqliteSaver\n",
+    "from langgraph.prebuilt import create_react_agent\n",
     "\n",
     "memory = SqliteSaver.from_conn_string(\":memory:\")\n",
     "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
diff --git a/docs/scripts/generate_api_reference_links.py b/docs/scripts/generate_api_reference_links.py
index f05cc9abc93..ff3eb53a2c9 100644
--- a/docs/scripts/generate_api_reference_links.py
+++ b/docs/scripts/generate_api_reference_links.py
@@ -24,7 +24,7 @@ _IMPORT_RE = re.compile(
 
 _CURRENT_PATH = Path(__file__).parent.absolute()
 # Directory where generated markdown files are stored
-_DOCS_DIR = _CURRENT_PATH / "docs"
+_DOCS_DIR = _CURRENT_PATH.parent.parent / "docs"
 
 
 def find_files(path):
@@ -75,6 +75,7 @@ def main():
 
     for file in find_files(args.docs_dir):
         file_imports = replace_imports(file)
+        print(file)
 
         if file_imports:
             # Use relative file path as key
diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py
index 32b0a0d1335..76cf101791d 100644
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@@ -89,7 +89,13 @@ class PyPDFParser(BaseBlobParser):
 
     def lazy_parse(self, blob: Blob) -> Iterator[Document]:  # type: ignore[valid-type]
         """Lazily parse the blob."""
-        import pypdf
+        try:
+            import pypdf
+        except ImportError:
+            raise ImportError(
+                "`pypdf` package not found, please install it with "
+                "`pip install pypdf`"
+            )
 
         with blob.as_bytes_io() as pdf_file_obj:  # type: ignore[attr-defined]
             pdf_reader = pypdf.PdfReader(pdf_file_obj, password=self.password)
@@ -144,7 +150,13 @@ class PDFMinerParser(BaseBlobParser):
         """Lazily parse the blob."""
 
         if not self.extract_images:
-            from pdfminer.high_level import extract_text
+            try:
+                from pdfminer.high_level import extract_text
+            except ImportError:
+                raise ImportError(
+                    "`pdfminer` package not found, please install it with "
+                    "`pip install pdfminer.six`"
+                )
 
             with blob.as_bytes_io() as pdf_file_obj:  # type: ignore[attr-defined]
                 if self.concatenate_pages: