docs: updated pinecone example notebook (#30993)

- **Description:** Update Pinecone notebook example
  - **Issue:** N\A
  - **Dependencies:** N\A
  - **Twitter handle:** N\A


- [ x ] **Add tests and docs**: Just notebook updates


If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
This commit is contained in:
Simonas Jakubonis 2025-05-03 23:02:21 +03:00 committed by GitHub
parent 1204fb8010
commit b8d0403671
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -26,7 +26,7 @@
},
"outputs": [],
"source": [
"pip install -qU langchain-pinecone pinecone-notebooks"
"pip install -qU langchain langchain-pinecone langchain-openai"
]
},
{
@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "eb554814",
"metadata": {},
"outputs": [],
@ -57,7 +57,7 @@
"import getpass\n",
"import os\n",
"\n",
"from pinecone import Pinecone, ServerlessSpec\n",
"from pinecone import Pinecone\n",
"\n",
"if not os.getenv(\"PINECONE_API_KEY\"):\n",
" os.environ[\"PINECONE_API_KEY\"] = getpass.getpass(\"Enter your Pinecone API key: \")\n",
@ -98,59 +98,41 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 4,
"id": "276a06dd",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"from pinecone import ServerlessSpec\n",
"\n",
"index_name = \"langchain-test-index\" # change if desired\n",
"\n",
"existing_indexes = [index_info[\"name\"] for index_info in pc.list_indexes()]\n",
"\n",
"if index_name not in existing_indexes:\n",
"if not pc.has_index(index_name):\n",
" pc.create_index(\n",
" name=index_name,\n",
" dimension=3072,\n",
" dimension=1536,\n",
" metric=\"cosine\",\n",
" spec=ServerlessSpec(cloud=\"aws\", region=\"us-east-1\"),\n",
" )\n",
" while not pc.describe_index(index_name).status[\"ready\"]:\n",
" time.sleep(1)\n",
"\n",
"index = pc.Index(index_name)"
]
},
{
"cell_type": "markdown",
"id": "3a4d377f",
"metadata": {},
"source": [
"Now that our Pinecone index is setup, we can initialize our vector store. \n",
"\n",
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
"\n",
"<EmbeddingTabs/>\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 5,
"id": "1485db56",
"metadata": {},
"outputs": [],
"source": [
"# | output: false\n",
"# | echo: false\n",
"from langchain_openai import OpenAIEmbeddings\n",
"\n",
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 6,
"id": "6e104aee",
"metadata": {},
"outputs": [],
@ -176,30 +158,10 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"id": "70e688f4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['167b8681-5974-467f-adcb-6e987a18df01',\n",
" 'd16010fd-41f8-4d49-9c22-c66d5555a3fe',\n",
" 'ffcacfb3-2bc2-44c3-a039-c2256a905c0e',\n",
" 'cf3bfc9f-5dc7-4f5e-bb41-edb957394126',\n",
" 'e99b07eb-fdff-4cb9-baa8-619fd8efeed3',\n",
" '68c93033-a24f-40bd-8492-92fa26b631a4',\n",
" 'b27a4ecb-b505-4c5d-89ff-526e3d103558',\n",
" '4868a9e6-e6fb-4079-b400-4a1dfbf0d4c4',\n",
" '921c0e9c-0550-4eb5-9a6c-ed44410788b2',\n",
" 'c446fc23-64e8-47e7-8c19-ecf985e9411e']"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"from uuid import uuid4\n",
"\n",
@ -268,7 +230,6 @@
" document_10,\n",
"]\n",
"uuids = [str(uuid4()) for _ in range(len(documents))]\n",
"\n",
"vector_store.add_documents(documents=documents, ids=uuids)"
]
},
@ -282,7 +243,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 8,
"id": "5b8437cd",
"metadata": {},
"outputs": [],
@ -306,19 +267,10 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 9,
"id": "ffbcb3fb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n"
]
}
],
"outputs": [],
"source": [
"results = vector_store.similarity_search(\n",
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
@ -341,18 +293,10 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"id": "5fb24583",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* [SIM=0.553187] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
]
}
],
"outputs": [],
"source": [
"results = vector_store.similarity_search_with_score(\n",
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
@ -377,25 +321,14 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"id": "78140e87",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"retriever = vector_store.as_retriever(\n",
" search_type=\"similarity_score_threshold\",\n",
" search_kwargs={\"k\": 1, \"score_threshold\": 0.5},\n",
" search_kwargs={\"k\": 1, \"score_threshold\": 0.4},\n",
")\n",
"retriever.invoke(\"Stealing from the bank is a crime\", filter={\"source\": \"news\"})"
]
@ -427,7 +360,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
@ -441,7 +374,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.10.15"
}
},
"nbformat": 4,