Added ipynb documentation

This commit is contained in:
dhirup 2025-07-25 13:29:33 -07:00
parent fd0e46691f
commit d23ee15f8d

View File

@ -0,0 +1,276 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c92e2cb6-d75b-4321-bf73-69b07d244aa7",
"metadata": {},
"source": [
"# Coherence\n",
"\n",
"This notebook covers how to get started with the `Coherence` vector store.\n",
"\n",
">[Coherence](https://www.oracle.com/java/coherence/) is an in-memory data grid that provides a distributed, fault-tolerant, and scalable platform for managing and accessing data. It is primarily used for high-performance, mission-critical enterprise applications that require low-latency access to large datasets. In addition to the commercially available product, Oracle also offers [Coherence CE (Community Edition)](https://github.com/oracle/coherence)\n"
]
},
{
"cell_type": "markdown",
"id": "77db1e62-622e-46c1-8f1d-4f32a42f22da",
"metadata": {},
"source": [
"## Setup\n",
"\n",
"To access `Coherence` vector stores you'll need to install the `langchain-coherence` integration package.\n",
"\n",
"```\n",
"pip install langchain_coherence\n",
"```\n"
]
},
{
"cell_type": "markdown",
"id": "a266cffc-0407-4fd3-b905-f42b0770125c",
"metadata": {},
"source": [
"## Usage\n",
"\n",
"Before using LangChain's CoherenceVectorStore you must ensure that a Coherence server ([Coherence CE](https://github.com/oracle/coherence) 25.03+ or [Oracle Coherence](https://www.oracle.com/java/coherence/) 14.1.2+) is running \n",
"\n",
"For local development, we recommend using the Coherence CE container image:\n",
"```aiignore\n",
"docker run -d -p 1408:1408 ghcr.io/oracle/coherence-ce:25.03.2\n",
"```\n",
"\n",
"### Add Documents and retrieve them:"
]
},
{
"cell_type": "raw",
"id": "1e432d99-4750-48b6-a5f9-e8680f9832e9",
"metadata": {},
"source": [
"from langchain_core.documents import Document\n",
"from langchain_core.embeddings import Embeddings\n",
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
"\n",
"from coherence import NamedMap, Session\n",
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
"\n",
"session: Session = await Session.create()\n",
"try:\n",
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
" # this embedding generates vectors of dimension 384\n",
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
" named_map,embedding,384)\n",
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
" documents = [d1, d2]\n",
" await cvs.aadd_documents(documents)\n",
"\n",
" ids = [doc.id for doc in documents]\n",
" l = await cvs.aget_by_ids(ids)\n",
" assert len(l) == len(ids)\n",
" print(\"====\")\n",
" for e in l:\n",
" print(e)\n",
"finally:\n",
" await session.close()"
]
},
{
"cell_type": "markdown",
"id": "eee71605-8d1f-431d-b796-07274b036da3",
"metadata": {},
"source": [
"### Delete Documents:"
]
},
{
"cell_type": "markdown",
"id": "97e0d922-7208-46be-bade-71f56ce8c586",
"metadata": {},
"source": [
"from langchain_core.documents import Document\n",
"from langchain_core.embeddings import Embeddings\n",
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
"\n",
"from coherence import NamedMap, Session\n",
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
"\n",
"session: Session = await Session.create()\n",
"try:\n",
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
" # this embedding generates vectors of dimension 384\n",
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
" named_map,embedding,384)\n",
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
" documents = [d1, d2]\n",
" await cvs.aadd_documents(documents)\n",
"\n",
" ids = [doc.id for doc in documents]\n",
" await cvs.adelete(ids)\n",
"finally:\n",
" await session.close()"
]
},
{
"cell_type": "markdown",
"id": "a8c6ad91-ee54-486a-888f-cbfc89be75fd",
"metadata": {},
"source": [
"### Similarity Search:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffe24b05-7bf1-4eaa-a030-6ac3a0446f29",
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.documents import Document\n",
"from langchain_core.embeddings import Embeddings\n",
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
"\n",
"from coherence import NamedMap, Session\n",
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
"\n",
"def test_data():\n",
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
" d3 :Document = Document(id=\"3\", page_content=\"tiger\")\n",
" d4 :Document = Document(id=\"4\", page_content=\"cat\")\n",
" d5 :Document = Document(id=\"5\", page_content=\"dog\")\n",
" d6 :Document = Document(id=\"6\", page_content=\"fox\")\n",
" d7 :Document = Document(id=\"7\", page_content=\"pear\")\n",
" d8 :Document = Document(id=\"8\", page_content=\"banana\")\n",
" d9 :Document = Document(id=\"9\", page_content=\"plum\")\n",
" d10 :Document = Document(id=\"10\", page_content=\"lion\")\n",
"\n",
" documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]\n",
" return documents\n",
"\n",
"async def test_asimilarity_search():\n",
" documents = test_data()\n",
" session: Session = await Session.create()\n",
" try:\n",
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
" # this embedding generates vectors of dimension 384\n",
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
" named_map,embedding,384)\n",
" await cvs.aadd_documents(documents)\n",
" ids = [doc.id for doc in documents]\n",
" l = await cvs.aget_by_ids(ids)\n",
" assert len(l) == 10\n",
"\n",
" result = await cvs.asimilarity_search(\"fruit\")\n",
" assert len(result) == 4\n",
" print(\"====\")\n",
" for e in result:\n",
" print(e)\n",
" finally:\n",
" await session.close()"
]
},
{
"cell_type": "markdown",
"id": "5411477e-5905-4a96-80d8-b8d2238c4bc4",
"metadata": {},
"source": [
"### Similarity Search by vector :"
]
},
{
"cell_type": "raw",
"id": "9713d8bc-db9d-46e7-9fec-2613c2635ad7",
"metadata": {},
"source": [
"from langchain_core.documents import Document\n",
"from langchain_core.embeddings import Embeddings\n",
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
"\n",
"from coherence import NamedMap, Session\n",
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
"\n",
"def test_data():\n",
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
" d3 :Document = Document(id=\"3\", page_content=\"tiger\")\n",
" d4 :Document = Document(id=\"4\", page_content=\"cat\")\n",
" d5 :Document = Document(id=\"5\", page_content=\"dog\")\n",
" d6 :Document = Document(id=\"6\", page_content=\"fox\")\n",
" d7 :Document = Document(id=\"7\", page_content=\"pear\")\n",
" d8 :Document = Document(id=\"8\", page_content=\"banana\")\n",
" d9 :Document = Document(id=\"9\", page_content=\"plum\")\n",
" d10 :Document = Document(id=\"10\", page_content=\"lion\")\n",
"\n",
" documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]\n",
" return documents\n",
"\n",
"async def test_asimilarity_search_by_vector():\n",
" documents = test_data()\n",
" session: Session = await Session.create()\n",
" try:\n",
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
" # this embedding generates vectors of dimension 384\n",
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
" named_map,embedding,384)\n",
" await cvs.aadd_documents(documents)\n",
" ids = [doc.id for doc in documents]\n",
" l = await cvs.aget_by_ids(ids)\n",
" assert len(l) == 10\n",
"\n",
" vector = cvs.embeddings.embed_query(\"fruit\")\n",
" result = await cvs.asimilarity_search_by_vector(vector)\n",
" assert len(result) == 4\n",
" print(\"====\")\n",
" for e in result:\n",
" print(e)\n",
" finally:\n",
" await session.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "baee76cd-4a9a-4a4e-8a88-f54d9dd98790",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"main_language": "python",
"notebook_metadata_filter": "-all"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.21"
}
},
"nbformat": 4,
"nbformat_minor": 5
}