mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-08 20:41:52 +00:00
docs: Pinecone Rerank example notebook (#31147)
Created an example notebook of how to use Pinecone Reranking service cc @jamescalam
This commit is contained in:
parent
66d1ed6099
commit
3cba22d8d7
322
docs/docs/integrations/retrievers/pinecone_rerank.ipynb
Normal file
322
docs/docs/integrations/retrievers/pinecone_rerank.ipynb
Normal file
@ -0,0 +1,322 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7fb27b941602401d91542211134fc71a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pinecone Rerank\n",
|
||||
"\n",
|
||||
"> This notebook shows how to use **PineconeRerank** for two-stage vector retrieval reranking using Pinecone's hosted reranking API as demonstrated in `langchain_pinecone/libs/pinecone/rerank.py`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acae54e37e7d407bbb7b55eff062a284",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"Install the `langchain-pinecone` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU \"langchain-pinecone\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8dd0d8092fe74a7c96281538738b07e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Credentials\n",
|
||||
"Set your Pinecone API key to use the reranking API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "72eea5119410473aa328ad9291626812",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"PINECONE_API_KEY\"] = os.getenv(\"PINECONE_API_KEY\") or getpass(\n",
|
||||
" \"Enter your Pinecone API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8edb47106e1a46a883d545849b8ab81b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"Use `PineconeRerank` to rerank a list of documents by relevance to a query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "10185d26023b46108eb7d9f57d49d2b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/jakit/customers/aurelio/langchain-pinecone/libs/pinecone/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Score: 0.9998 | Content: Paris is the capital of France.\n",
|
||||
"Score: 0.1950 | Content: The Eiffel Tower is in Paris.\n",
|
||||
"Score: 0.0042 | Content: Berlin is the capital of Germany.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_pinecone import PineconeRerank\n",
|
||||
"\n",
|
||||
"# Initialize reranker\n",
|
||||
"reranker = PineconeRerank(model=\"bge-reranker-v2-m3\")\n",
|
||||
"\n",
|
||||
"# Sample documents\n",
|
||||
"documents = [\n",
|
||||
" Document(page_content=\"Paris is the capital of France.\"),\n",
|
||||
" Document(page_content=\"Berlin is the capital of Germany.\"),\n",
|
||||
" Document(page_content=\"The Eiffel Tower is in Paris.\"),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Rerank documents\n",
|
||||
"query = \"What is the capital of France?\"\n",
|
||||
"reranked_docs = reranker.compress_documents(documents, query)\n",
|
||||
"\n",
|
||||
"# Print results\n",
|
||||
"for doc in reranked_docs:\n",
|
||||
" score = doc.metadata.get(\"relevance_score\")\n",
|
||||
" print(f\"Score: {score:.4f} | Content: {doc.page_content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8763a12b2bbd4a93a75aff182afb95dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"### Reranking with Top-N\n",
|
||||
"Specify `top_n` to limit the number of returned documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7623eae2785240b9bd12b16a66d81610",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Top-1 Result:\n",
|
||||
"Score: 0.9998 | Content: Paris is the capital of France.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Return only top-1 result\n",
|
||||
"reranker_top1 = PineconeRerank(model=\"bge-reranker-v2-m3\", top_n=1)\n",
|
||||
"top1_docs = reranker_top1.compress_documents(documents, query)\n",
|
||||
"print(\"Top-1 Result:\")\n",
|
||||
"for doc in top1_docs:\n",
|
||||
" print(f\"Score: {doc.metadata['relevance_score']:.4f} | Content: {doc.page_content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cdc8c89c7104fffa095e18ddfef8986",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reranking with Custom Rank Fields\n",
|
||||
"If your documents are dictionaries or have custom fields, use `rank_fields` to specify the field to rank on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b118ea5561624da68c537baed56e602f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID: doc3 | Score: 0.9892\n",
|
||||
"ID: doc1 | Score: 0.0006\n",
|
||||
"ID: doc2 | Score: 0.0000\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Sample dictionary documents with 'text' field\n",
|
||||
"docs_dict = [\n",
|
||||
" {\n",
|
||||
" \"id\": \"doc1\",\n",
|
||||
" \"text\": \"Article about renewable energy.\",\n",
|
||||
" \"title\": \"Renewable Energy\",\n",
|
||||
" },\n",
|
||||
" {\"id\": \"doc2\", \"text\": \"Report on economic growth.\", \"title\": \"Economic Growth\"},\n",
|
||||
" {\n",
|
||||
" \"id\": \"doc3\",\n",
|
||||
" \"text\": \"News on climate policy changes.\",\n",
|
||||
" \"title\": \"Climate Policy\",\n",
|
||||
" },\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Initialize reranker with rank_fields\n",
|
||||
"reranker_text = PineconeRerank(model=\"bge-reranker-v2-m3\", rank_fields=[\"text\"])\n",
|
||||
"climate_docs = reranker_text.rerank(docs_dict, \"Latest news on climate change.\")\n",
|
||||
"\n",
|
||||
"# Show IDs and scores\n",
|
||||
"for res in climate_docs:\n",
|
||||
" print(f\"ID: {res['id']} | Score: {res['score']:.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a80bb6c3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can rerank based on title field"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a6f2768e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID: doc2 | Score: 0.8918 | Title: Economic Growth\n",
|
||||
"ID: doc3 | Score: 0.0002 | Title: Climate Policy\n",
|
||||
"ID: doc1 | Score: 0.0000 | Title: Renewable Energy\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"economic_docs = reranker_text.rerank(docs_dict, \"Economic forecast.\")\n",
|
||||
"\n",
|
||||
"# Show IDs and scores\n",
|
||||
"for res in economic_docs:\n",
|
||||
" print(\n",
|
||||
" f\"ID: {res['id']} | Score: {res['score']:.4f} | Title: {res['document']['title']}\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "938c804e27f84196a10c8828c723f798",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reranking with Additional Parameters\n",
|
||||
"You can pass model-specific parameters (e.g., `truncate`) directly to `.rerank()`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a94c501c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"How to handle inputs longer than those supported by the model. Accepted values: END or NONE.\n",
|
||||
"END truncates the input sequence at the input token limit. NONE returns an error when the input exceeds the input token limit."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "504fb2a444614c0babb325280ed9130a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID: docA | Score: 0.6950\n",
|
||||
"ID: docB | Score: 0.0001\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Rerank with custom truncate parameter\n",
|
||||
"docs_simple = [\n",
|
||||
" {\"id\": \"docA\", \"text\": \"Quantum entanglement is a physical phenomenon...\"},\n",
|
||||
" {\"id\": \"docB\", \"text\": \"Classical mechanics describes motion...\"},\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"reranked = reranker.rerank(\n",
|
||||
" documents=docs_simple,\n",
|
||||
" query=\"Explain the concept of quantum entanglement.\",\n",
|
||||
" truncate=\"END\",\n",
|
||||
")\n",
|
||||
"# Print reranked IDs and scores\n",
|
||||
"for res in reranked:\n",
|
||||
" print(f\"ID: {res['id']} | Score: {res['score']:.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab78bcd8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within a chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "59bbdb311c014d738909a11f9e486628",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"- `PineconeRerank(model, top_n, rank_fields, return_documents)`\n",
|
||||
"- `.rerank(documents, query, rank_fields=None, model=None, top_n=None, truncate=\"END\")`\n",
|
||||
"- `.compress_documents(documents, query)` (returns `Document` objects with `relevance_score` in metadata)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user