From d23ee15f8d0e8bc047fb30949deef1a070a0c314 Mon Sep 17 00:00:00 2001 From: dhirup Date: Fri, 25 Jul 2025 13:29:33 -0700 Subject: [PATCH] Added ipynb documentation --- .../integrations/vectorstores/coherence.ipynb | 276 ++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 docs/docs/integrations/vectorstores/coherence.ipynb diff --git a/docs/docs/integrations/vectorstores/coherence.ipynb b/docs/docs/integrations/vectorstores/coherence.ipynb new file mode 100644 index 00000000000..e367a49ffe7 --- /dev/null +++ b/docs/docs/integrations/vectorstores/coherence.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c92e2cb6-d75b-4321-bf73-69b07d244aa7", + "metadata": {}, + "source": [ + "# Coherence\n", + "\n", + "This notebook covers how to get started with the `Coherence` vector store.\n", + "\n", + ">[Coherence](https://www.oracle.com/java/coherence/) is an in-memory data grid that provides a distributed, fault-tolerant, and scalable platform for managing and accessing data. It is primarily used for high-performance, mission-critical enterprise applications that require low-latency access to large datasets. In addition to the commercially available product, Oracle also offers [Coherence CE (Community Edition)](https://github.com/oracle/coherence)\n" + ] + }, + { + "cell_type": "markdown", + "id": "77db1e62-622e-46c1-8f1d-4f32a42f22da", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To access `Coherence` vector stores you'll need to install the `langchain-coherence` integration package.\n", + "\n", + "```\n", + "pip install langchain_coherence\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "a266cffc-0407-4fd3-b905-f42b0770125c", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Before using LangChain's CoherenceVectorStore you must ensure that a Coherence server ([Coherence CE](https://github.com/oracle/coherence) 25.03+ or [Oracle Coherence](https://www.oracle.com/java/coherence/) 14.1.2+) is running \n", + "\n", + "For local development, we recommend using the Coherence CE container image:\n", + "```aiignore\n", + "docker run -d -p 1408:1408 ghcr.io/oracle/coherence-ce:25.03.2\n", + "```\n", + "\n", + "### Add Documents and retrieve them:" + ] + }, + { + "cell_type": "raw", + "id": "1e432d99-4750-48b6-a5f9-e8680f9832e9", + "metadata": {}, + "source": [ + "from langchain_core.documents import Document\n", + "from langchain_core.embeddings import Embeddings\n", + "from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n", + "\n", + "from coherence import NamedMap, Session\n", + "from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n", + "\n", + "session: Session = await Session.create()\n", + "try:\n", + " named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n", + " embedding :Embeddings = HuggingFaceEmbeddings(\n", + " model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n", + " # this embedding generates vectors of dimension 384\n", + " cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n", + " named_map,embedding,384)\n", + " d1 :Document = Document(id=\"1\", page_content=\"apple\")\n", + " d2 :Document = Document(id=\"2\", page_content=\"orange\")\n", + " documents = [d1, d2]\n", + " await cvs.aadd_documents(documents)\n", + "\n", + " ids = [doc.id for doc in documents]\n", + " l = await cvs.aget_by_ids(ids)\n", + " assert len(l) == len(ids)\n", + " print(\"====\")\n", + " for e in l:\n", + " print(e)\n", + "finally:\n", + " await session.close()" + ] + }, + { + "cell_type": "markdown", + "id": "eee71605-8d1f-431d-b796-07274b036da3", + "metadata": {}, + "source": [ + "### Delete Documents:" + ] + }, + { + "cell_type": "markdown", + "id": "97e0d922-7208-46be-bade-71f56ce8c586", + "metadata": {}, + "source": [ + "from langchain_core.documents import Document\n", + "from langchain_core.embeddings import Embeddings\n", + "from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n", + "\n", + "from coherence import NamedMap, Session\n", + "from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n", + "\n", + "session: Session = await Session.create()\n", + "try:\n", + " named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n", + " embedding :Embeddings = HuggingFaceEmbeddings(\n", + " model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n", + " # this embedding generates vectors of dimension 384\n", + " cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n", + " named_map,embedding,384)\n", + " d1 :Document = Document(id=\"1\", page_content=\"apple\")\n", + " d2 :Document = Document(id=\"2\", page_content=\"orange\")\n", + " documents = [d1, d2]\n", + " await cvs.aadd_documents(documents)\n", + "\n", + " ids = [doc.id for doc in documents]\n", + " await cvs.adelete(ids)\n", + "finally:\n", + " await session.close()" + ] + }, + { + "cell_type": "markdown", + "id": "a8c6ad91-ee54-486a-888f-cbfc89be75fd", + "metadata": {}, + "source": [ + "### Similarity Search:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffe24b05-7bf1-4eaa-a030-6ac3a0446f29", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.documents import Document\n", + "from langchain_core.embeddings import Embeddings\n", + "from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n", + "\n", + "from coherence import NamedMap, Session\n", + "from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n", + "\n", + "def test_data():\n", + " d1 :Document = Document(id=\"1\", page_content=\"apple\")\n", + " d2 :Document = Document(id=\"2\", page_content=\"orange\")\n", + " d3 :Document = Document(id=\"3\", page_content=\"tiger\")\n", + " d4 :Document = Document(id=\"4\", page_content=\"cat\")\n", + " d5 :Document = Document(id=\"5\", page_content=\"dog\")\n", + " d6 :Document = Document(id=\"6\", page_content=\"fox\")\n", + " d7 :Document = Document(id=\"7\", page_content=\"pear\")\n", + " d8 :Document = Document(id=\"8\", page_content=\"banana\")\n", + " d9 :Document = Document(id=\"9\", page_content=\"plum\")\n", + " d10 :Document = Document(id=\"10\", page_content=\"lion\")\n", + "\n", + " documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]\n", + " return documents\n", + "\n", + "async def test_asimilarity_search():\n", + " documents = test_data()\n", + " session: Session = await Session.create()\n", + " try:\n", + " named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n", + " embedding :Embeddings = HuggingFaceEmbeddings(\n", + " model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n", + " # this embedding generates vectors of dimension 384\n", + " cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n", + " named_map,embedding,384)\n", + " await cvs.aadd_documents(documents)\n", + " ids = [doc.id for doc in documents]\n", + " l = await cvs.aget_by_ids(ids)\n", + " assert len(l) == 10\n", + "\n", + " result = await cvs.asimilarity_search(\"fruit\")\n", + " assert len(result) == 4\n", + " print(\"====\")\n", + " for e in result:\n", + " print(e)\n", + " finally:\n", + " await session.close()" + ] + }, + { + "cell_type": "markdown", + "id": "5411477e-5905-4a96-80d8-b8d2238c4bc4", + "metadata": {}, + "source": [ + "### Similarity Search by vector :" + ] + }, + { + "cell_type": "raw", + "id": "9713d8bc-db9d-46e7-9fec-2613c2635ad7", + "metadata": {}, + "source": [ + "from langchain_core.documents import Document\n", + "from langchain_core.embeddings import Embeddings\n", + "from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n", + "\n", + "from coherence import NamedMap, Session\n", + "from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n", + "\n", + "def test_data():\n", + " d1 :Document = Document(id=\"1\", page_content=\"apple\")\n", + " d2 :Document = Document(id=\"2\", page_content=\"orange\")\n", + " d3 :Document = Document(id=\"3\", page_content=\"tiger\")\n", + " d4 :Document = Document(id=\"4\", page_content=\"cat\")\n", + " d5 :Document = Document(id=\"5\", page_content=\"dog\")\n", + " d6 :Document = Document(id=\"6\", page_content=\"fox\")\n", + " d7 :Document = Document(id=\"7\", page_content=\"pear\")\n", + " d8 :Document = Document(id=\"8\", page_content=\"banana\")\n", + " d9 :Document = Document(id=\"9\", page_content=\"plum\")\n", + " d10 :Document = Document(id=\"10\", page_content=\"lion\")\n", + "\n", + " documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]\n", + " return documents\n", + "\n", + "async def test_asimilarity_search_by_vector():\n", + " documents = test_data()\n", + " session: Session = await Session.create()\n", + " try:\n", + " named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n", + " embedding :Embeddings = HuggingFaceEmbeddings(\n", + " model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n", + " # this embedding generates vectors of dimension 384\n", + " cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n", + " named_map,embedding,384)\n", + " await cvs.aadd_documents(documents)\n", + " ids = [doc.id for doc in documents]\n", + " l = await cvs.aget_by_ids(ids)\n", + " assert len(l) == 10\n", + "\n", + " vector = cvs.embeddings.embed_query(\"fruit\")\n", + " result = await cvs.asimilarity_search_by_vector(vector)\n", + " assert len(result) == 4\n", + " print(\"====\")\n", + " for e in result:\n", + " print(e)\n", + " finally:\n", + " await session.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "baee76cd-4a9a-4a4e-8a88-f54d9dd98790", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.21" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}