docs[minor],community[patch]: Minor tutorial docs improvement, minor import error quick fix. (#22725)

minor changes to module import error handling and minor issues in tutorial documents. --------- Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
2025-06-23 07:09:31 +00:00 · 2024-06-20 14:36:49 -05:00 · 2024-06-20 14:36:49 -05:00 · a349fce880
commit a349fce880
parent 7545b1d29b
5 changed files with 42 additions and 11 deletions
--- a/docs/docs/how_to/document_loader_pdf.ipynb
+++ b/docs/docs/how_to/document_loader_pdf.ipynb
@ -69,6 +69,17 @@
    "Once we have loaded PDFs into LangChain `Document` objects, we can index them (e.g., a RAG application) in the usual way:"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3b932bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install faiss-cpu \n",
+    "# use `pip install faiss-gpu` for CUDA GPU support"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/docs/docs/integrations/document_loaders/microsoft_powerpoint.ipynb
+++ b/docs/docs/integrations/document_loaders/microsoft_powerpoint.ipynb
@ -12,6 +12,19 @@
    "This covers how to load `Microsoft PowerPoint` documents into a document format that we can use downstream."
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aef1500f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install packages\n",
+    "%pip install unstructured\n",
+    "%pip install python-magic\n",
+    "%pip install python-pptx"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 1,
--- a/docs/docs/tutorials/qa_chat_history.ipynb
+++ b/docs/docs/tutorials/qa_chat_history.ipynb
@ -322,7 +322,7 @@
    "\n",
    "Now we can build our full QA chain. This is as simple as updating the retriever to be our new `history_aware_retriever`.\n",
    "\n",
-    "Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n",
+    "Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer. A more detailed explaination is over [here](/docs/tutorials/rag/#built-in-chains)\n",
    "\n",
    "We build our final `rag_chain` with [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html). This chain applies the `history_aware_retriever` and `question_answer_chain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output."
   ]
@ -760,13 +760,6 @@
   "id": "931c4fe3-c603-4efb-9b37-5f7cbbb1cbbd",
   "metadata": {},
   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Error in LangChainTracer.on_tool_end callback: TracerException(\"Found chain run at ID 0ec120e2-b1fc-4593-9fee-2dd4f4cae256, but expected {'tool'} run.\")\n"
-     ]
-    },
    {
     "data": {
      "text/plain": [
@ -1030,6 +1023,7 @@
    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "from langgraph.checkpoint.sqlite import SqliteSaver\n",
+    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "memory = SqliteSaver.from_conn_string(\":memory:\")\n",
    "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
--- a/docs/scripts/generate_api_reference_links.py
+++ b/docs/scripts/generate_api_reference_links.py
@ -24,7 +24,7 @@ _IMPORT_RE = re.compile(

 _CURRENT_PATH = Path(__file__).parent.absolute()
 # Directory where generated markdown files are stored
-_DOCS_DIR = _CURRENT_PATH / "docs"
+_DOCS_DIR = _CURRENT_PATH.parent.parent / "docs"


 def find_files(path):
@ -75,6 +75,7 @@ def main():

    for file in find_files(args.docs_dir):
        file_imports = replace_imports(file)
+        print(file)

        if file_imports:
            # Use relative file path as key
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@ -89,7 +89,13 @@ class PyPDFParser(BaseBlobParser):

    def lazy_parse(self, blob: Blob) -> Iterator[Document]:  # type: ignore[valid-type]
        """Lazily parse the blob."""
+        try:
            import pypdf
+        except ImportError:
+            raise ImportError(
+                "`pypdf` package not found, please install it with "
+                "`pip install pypdf`"
+            )

        with blob.as_bytes_io() as pdf_file_obj:  # type: ignore[attr-defined]
            pdf_reader = pypdf.PdfReader(pdf_file_obj, password=self.password)
@ -144,7 +150,13 @@ class PDFMinerParser(BaseBlobParser):
        """Lazily parse the blob."""

        if not self.extract_images:
+            try:
                from pdfminer.high_level import extract_text
+            except ImportError:
+                raise ImportError(
+                    "`pdfminer` package not found, please install it with "
+                    "`pip install pdfminer.six`"
+                )

            with blob.as_bytes_io() as pdf_file_obj:  # type: ignore[attr-defined]
                if self.concatenate_pages: