diff --git a/docs/docs/integrations/retrievers/bm25.ipynb b/docs/docs/integrations/retrievers/bm25.ipynb
index 5e0b3fa1984..401031db5fb 100644
--- a/docs/docs/integrations/retrievers/bm25.ipynb
+++ b/docs/docs/integrations/retrievers/bm25.ipynb
@@ -15,7 +15,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a801b57c",
+   "id": "eccbbc4a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,9 +24,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "393ac030",
    "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-11-13T23:35:51.348359Z",
+     "start_time": "2024-11-13T23:35:49.409254Z"
+    },
     "tags": []
    },
    "outputs": [],
@@ -44,9 +48,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "id": "98b1c017",
    "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-11-13T23:35:53.096938Z",
+     "start_time": "2024-11-13T23:35:52.493243Z"
+    },
     "tags": []
    },
    "outputs": [],
@@ -66,9 +74,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "id": "53af4f00",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-11-13T23:35:54.202737Z",
+     "start_time": "2024-11-13T23:35:54.198431Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from langchain_core.documents import Document\n",
@@ -96,9 +109,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "id": "c0455218",
    "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-11-13T23:35:55.643026Z",
+     "start_time": "2024-11-13T23:35:55.595272Z"
+    },
     "tags": []
    },
    "outputs": [],
@@ -108,22 +125,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "id": "7dfa5c29",
    "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-11-13T23:35:56.122327Z",
+     "start_time": "2024-11-13T23:35:56.112647Z"
+    },
     "tags": []
    },
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[Document(page_content='foo', metadata={}),\n",
-       " Document(page_content='foo bar', metadata={}),\n",
-       " Document(page_content='hello', metadata={}),\n",
-       " Document(page_content='world', metadata={})]"
+       "[Document(metadata={}, page_content='foo'),\n",
+       " Document(metadata={}, page_content='foo bar'),\n",
+       " Document(metadata={}, page_content='hello'),\n",
+       " Document(metadata={}, page_content='world')]"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -132,13 +153,68 @@
     "result"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "51043723814c0d68",
+   "metadata": {},
+   "source": [
+    "## Preprocessing Function\n",
+    "Pass a custom preprocessing function to the retriever to improve search results. Tokenizing text at the word level can enhance retrieval, especially when using vector stores like Chroma, Pinecone, or Faiss for chunked documents."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "997aaa8d",
+   "id": "f5fea58b",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "import nltk\n",
+    "\n",
+    "nltk.download(\"punkt_tab\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "566fcc801cda5da4",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-11-14T00:40:58.728953Z",
+     "start_time": "2024-11-14T00:40:58.722140Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(metadata={}, page_content='bar'),\n",
+       " Document(metadata={}, page_content='foo bar')]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from nltk.tokenize import word_tokenize\n",
+    "\n",
+    "retriever = BM25Retriever.from_documents(\n",
+    "    [\n",
+    "        Document(page_content=\"foo\"),\n",
+    "        Document(page_content=\"bar\"),\n",
+    "        Document(page_content=\"world\"),\n",
+    "        Document(page_content=\"hello\"),\n",
+    "        Document(page_content=\"foo bar\"),\n",
+    "    ],\n",
+    "    k=2,\n",
+    "    preprocess_func=word_tokenize,\n",
+    ")\n",
+    "\n",
+    "result = retriever.invoke(\"bar\")\n",
+    "result"
+   ]
   }
  ],
  "metadata": {