diff --git a/docs/docs/integrations/document_transformers/rankllm-reranker.ipynb b/docs/docs/integrations/document_transformers/rankllm-reranker.ipynb index 6875e86fe89..9e87053987c 100644 --- a/docs/docs/integrations/document_transformers/rankllm-reranker.ipynb +++ b/docs/docs/integrations/document_transformers/rankllm-reranker.ipynb @@ -11,39 +11,41 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[RankLLM](https://github.com/castorini/rank_llm) offers a suite of listwise rerankers, albeit with focus on open source LLMs finetuned for the task - RankVicuna and RankZephyr being two of them." + "**[RankLLM](https://github.com/castorini/rank_llm)** is a **flexible reranking framework** supporting **listwise, pairwise, and pointwise ranking models**. It includes **RankVicuna, RankZephyr, MonoT5, DuoT5, LiT5, and FirstMistral**, with integration for **FastChat, vLLM, SGLang, and TensorRT-LLM** for efficient inference. RankLLM is optimized for **retrieval and ranking tasks**, leveraging both **open-source LLMs** and proprietary rerankers like **RankGPT and RankGemini**. It supports **batched inference, first-token reranking, and retrieval via BM25 and SPLADE**.\n", + "\n", + "> **Note:** If using the built-in retriever, RankLLM requires **Pyserini, JDK 21, PyTorch, and Faiss** for retrieval functionality." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "%pip install --upgrade --quiet rank_llm" + "%pip install --upgrade --quiet rank_llm" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "%pip install --upgrade --quiet langchain_openai" + "%pip install --upgrade --quiet langchain_openai" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "%pip install --upgrade --quiet faiss-cpu" + "%pip install --upgrade --quiet faiss-cpu" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -64,7 +66,7 @@ "def pretty_print_docs(docs):\n", " print(\n", " f\"\\n{'-' * 100}\\n\".join(\n", - " [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n", + " [f\"Document {i + 1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n", " )\n", " )" ] @@ -79,9 +81,17 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-02-22 15:28:58,344 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + } + ], "source": [ "from langchain_community.document_loaders import TextLoader\n", "from langchain_community.vectorstores import FAISS\n", @@ -114,14 +124,14 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "2025-02-17 04:37:08,458 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + "2025-02-22 15:29:00,892 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" ] }, { @@ -331,27 +341,41 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Retrieval + Reranking with RankZephyr" + "RankZephyr performs listwise reranking for improved retrieval quality but requires at least 24GB of VRAM to run efficiently." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading shards: 100%|██████████| 3/3 [00:00<00:00, 2674.37it/s]\n", + "Loading checkpoint shards: 100%|██████████| 3/3 [01:49<00:00, 36.39s/it]\n" + ] + } + ], "source": [ + "import torch\n", "from langchain.retrievers.contextual_compression import ContextualCompressionRetriever\n", "from langchain_community.document_compressors.rankllm_rerank import RankLLMRerank\n", "\n", - "compressor = RankLLMRerank(top_n=3, model=\"zephyr\")\n", + "torch.cuda.empty_cache()\n", + "\n", + "compressor = RankLLMRerank(top_n=3, model=\"rank_zephyr\")\n", "compression_retriever = ContextualCompressionRetriever(\n", " base_compressor=compressor, base_retriever=retriever\n", - ")" + ")\n", + "\n", + "del compressor" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -386,7 +410,7 @@ ] }, { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ "\n" @@ -407,7 +431,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -432,7 +456,7 @@ " llm=ChatOpenAI(temperature=0), retriever=compression_retriever\n", ")\n", "\n", - "chain({\"query\": query})" + "chain.invoke({\"query\": query})" ] }, { @@ -451,9 +475,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-02-22 15:01:29,469 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -683,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -698,9 +729,18 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-02-22 15:01:38,554 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + " 0%| | 0/1 [00:00