docs: updated pinecone example notebook (#30993)

- **Description:** Update Pinecone notebook example
  - **Issue:** N\A
  - **Dependencies:** N\A
  - **Twitter handle:** N\A


- [ x ] **Add tests and docs**: Just notebook updates


If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
This commit is contained in:
Simonas Jakubonis 2025-05-03 23:02:21 +03:00 committed by GitHub
parent 1204fb8010
commit b8d0403671
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -26,7 +26,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"pip install -qU langchain-pinecone pinecone-notebooks" "pip install -qU langchain langchain-pinecone langchain-openai"
] ]
}, },
{ {
@ -49,7 +49,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"id": "eb554814", "id": "eb554814",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -57,7 +57,7 @@
"import getpass\n", "import getpass\n",
"import os\n", "import os\n",
"\n", "\n",
"from pinecone import Pinecone, ServerlessSpec\n", "from pinecone import Pinecone\n",
"\n", "\n",
"if not os.getenv(\"PINECONE_API_KEY\"):\n", "if not os.getenv(\"PINECONE_API_KEY\"):\n",
" os.environ[\"PINECONE_API_KEY\"] = getpass.getpass(\"Enter your Pinecone API key: \")\n", " os.environ[\"PINECONE_API_KEY\"] = getpass.getpass(\"Enter your Pinecone API key: \")\n",
@ -98,59 +98,41 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 4,
"id": "276a06dd", "id": "276a06dd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import time\n", "from pinecone import ServerlessSpec\n",
"\n", "\n",
"index_name = \"langchain-test-index\" # change if desired\n", "index_name = \"langchain-test-index\" # change if desired\n",
"\n", "\n",
"existing_indexes = [index_info[\"name\"] for index_info in pc.list_indexes()]\n", "if not pc.has_index(index_name):\n",
"\n",
"if index_name not in existing_indexes:\n",
" pc.create_index(\n", " pc.create_index(\n",
" name=index_name,\n", " name=index_name,\n",
" dimension=3072,\n", " dimension=1536,\n",
" metric=\"cosine\",\n", " metric=\"cosine\",\n",
" spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n", " spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
" )\n", " )\n",
" while not pc.describe_index(index_name).status[\"ready\"]:\n",
" time.sleep(1)\n",
"\n", "\n",
"index = pc.Index(index_name)" "index = pc.Index(index_name)"
] ]
}, },
{
"cell_type": "markdown",
"id": "3a4d377f",
"metadata": {},
"source": [
"Now that our Pinecone index is setup, we can initialize our vector store. \n",
"\n",
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
"\n",
"<EmbeddingTabs/>\n"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 5,
"id": "1485db56", "id": "1485db56",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# | output: false\n",
"# | echo: false\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"\n", "\n",
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")" "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 6,
"id": "6e104aee", "id": "6e104aee",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -176,30 +158,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": null,
"id": "70e688f4", "id": "70e688f4",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"['167b8681-5974-467f-adcb-6e987a18df01',\n",
" 'd16010fd-41f8-4d49-9c22-c66d5555a3fe',\n",
" 'ffcacfb3-2bc2-44c3-a039-c2256a905c0e',\n",
" 'cf3bfc9f-5dc7-4f5e-bb41-edb957394126',\n",
" 'e99b07eb-fdff-4cb9-baa8-619fd8efeed3',\n",
" '68c93033-a24f-40bd-8492-92fa26b631a4',\n",
" 'b27a4ecb-b505-4c5d-89ff-526e3d103558',\n",
" '4868a9e6-e6fb-4079-b400-4a1dfbf0d4c4',\n",
" '921c0e9c-0550-4eb5-9a6c-ed44410788b2',\n",
" 'c446fc23-64e8-47e7-8c19-ecf985e9411e']"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"from uuid import uuid4\n", "from uuid import uuid4\n",
"\n", "\n",
@ -268,7 +230,6 @@
" document_10,\n", " document_10,\n",
"]\n", "]\n",
"uuids = [str(uuid4()) for _ in range(len(documents))]\n", "uuids = [str(uuid4()) for _ in range(len(documents))]\n",
"\n",
"vector_store.add_documents(documents=documents, ids=uuids)" "vector_store.add_documents(documents=documents, ids=uuids)"
] ]
}, },
@ -282,7 +243,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 8,
"id": "5b8437cd", "id": "5b8437cd",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -306,19 +267,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 9,
"id": "ffbcb3fb", "id": "ffbcb3fb",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
]
}
],
"source": [ "source": [
"results = vector_store.similarity_search(\n", "results = vector_store.similarity_search(\n",
" \"LangChain provides abstractions to make working with LLMs easy\",\n", " \"LangChain provides abstractions to make working with LLMs easy\",\n",
@ -341,18 +293,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": null,
"id": "5fb24583", "id": "5fb24583",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"* [SIM=0.553187] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
]
}
],
"source": [ "source": [
"results = vector_store.similarity_search_with_score(\n", "results = vector_store.similarity_search_with_score(\n",
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n", " \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
@ -377,25 +321,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": null,
"id": "78140e87", "id": "78140e87",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"retriever = vector_store.as_retriever(\n", "retriever = vector_store.as_retriever(\n",
" search_type=\"similarity_score_threshold\",\n", " search_type=\"similarity_score_threshold\",\n",
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n", " search_kwargs={\"k\": 1, \"score_threshold\": 0.4},\n",
")\n", ")\n",
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})" "retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
] ]
@ -427,7 +360,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 (ipykernel)", "display_name": ".venv",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@ -441,7 +374,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.9" "version": "3.10.15"
} }
}, },
"nbformat": 4, "nbformat": 4,