mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-30 00:04:19 +00:00
Merge 98fb660e74
into 0e287763cd
This commit is contained in:
commit
c873c722b7
382
docs/docs/integrations/vectorstores/coherence.ipynb
Normal file
382
docs/docs/integrations/vectorstores/coherence.ipynb
Normal file
@ -0,0 +1,382 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6eb5f05c-0488-4563-a81f-d8e6c2c6b7d6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Coherence\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with the `Coherence` vector store.\n",
|
||||
"\n",
|
||||
">[Coherence](https://www.oracle.com/java/coherence/) is an in-memory data grid that provides a distributed, fault-tolerant, and scalable platform for managing and accessing data. It is primarily used for high-performance, mission-critical enterprise applications that require low-latency access to large datasets. In addition to the commercially available product, Oracle also offers [Coherence CE (Community Edition)](https://github.com/oracle/coherence)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "59238b02-359e-4ac3-939d-e88735183f28",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `Coherence` vector stores you'll need to install the `langchain-coherence` integration package.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"pip install langchain_coherence\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35761315-a34e-4f03-9246-459c1c36331b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "592b5db4-807c-4a04-ac64-f0077c44edfa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Usage\n",
|
||||
"\n",
|
||||
"Before using LangChain's CoherenceVectorStore you must ensure that a Coherence server ([Coherence CE](https://github.com/oracle/coherence) 25.03+ or [Oracle Coherence](https://www.oracle.com/java/coherence/) 14.1.2+) is running \n",
|
||||
"\n",
|
||||
"For local development, we recommend using the Coherence CE container image:\n",
|
||||
"```\n",
|
||||
"docker run -d -p 1408:1408 ghcr.io/oracle/coherence-ce:25.03.2\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86544bf7-8459-40cd-813c-cdbbcde9084f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Basic Initialization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1bdd7da5-c050-47b0-a08c-52d1bd7b6948",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "RuntimeError",
|
||||
"evalue": "Unexpected error, <AioRpcError of RPC that terminated with:\n\tstatus = StatusCode.UNAVAILABLE\n\tdetails = \"connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer {grpc_status:14, grpc_message:\"connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)\"}\"\n>, when attempting to handshake with proxy: connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mAioRpcError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m~/work/coherence/github/dhirupandey/langchain/libs/partners/coherence/.venv/lib/python3.9/site-packages/coherence/client.py:113\u001b[0m, in \u001b[0;36m_Handshake.handshake\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 113\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mwrite(RequestFactoryV1\u001b[38;5;241m.\u001b[39minit_sub_channel())\n\u001b[1;32m 114\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mwait_for(stream\u001b[38;5;241m.\u001b[39mread(), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_session\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mrequest_timeout_seconds)\n",
|
||||
"File \u001b[0;32m~/work/coherence/github/dhirupandey/langchain/libs/partners/coherence/.venv/lib/python3.9/site-packages/grpc/aio/_call.py:526\u001b[0m, in \u001b[0;36m_StreamRequestMixin.write\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_for_different_style(_APIStyle\u001b[38;5;241m.\u001b[39mREADER_WRITER)\n\u001b[0;32m--> 526\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_write(request)\n",
|
||||
"File \u001b[0;32m~/work/coherence/github/dhirupandey/langchain/libs/partners/coherence/.venv/lib/python3.9/site-packages/grpc/aio/_call.py:495\u001b[0m, in \u001b[0;36m_StreamRequestMixin._write\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdone():\n\u001b[0;32m--> 495\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_for_status()\n\u001b[1;32m 497\u001b[0m serialized_request \u001b[38;5;241m=\u001b[39m _common\u001b[38;5;241m.\u001b[39mserialize(\n\u001b[1;32m 498\u001b[0m request, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_request_serializer\n\u001b[1;32m 499\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/work/coherence/github/dhirupandey/langchain/libs/partners/coherence/.venv/lib/python3.9/site-packages/grpc/aio/_call.py:272\u001b[0m, in \u001b[0;36mCall._raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m code \u001b[38;5;241m!=\u001b[39m grpc\u001b[38;5;241m.\u001b[39mStatusCode\u001b[38;5;241m.\u001b[39mOK:\n\u001b[0;32m--> 272\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m _create_rpc_error(\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minitial_metadata(), \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cython_call\u001b[38;5;241m.\u001b[39mstatus()\n\u001b[1;32m 274\u001b[0m )\n",
|
||||
"\u001b[0;31mAioRpcError\u001b[0m: <AioRpcError of RPC that terminated with:\n\tstatus = StatusCode.UNAVAILABLE\n\tdetails = \"connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer {grpc_status:14, grpc_message:\"connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)\"}\"\n>",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[5], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcoherence\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m NamedMap, Session\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlangchain_coherence\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m CoherenceVectorStore\n\u001b[0;32m----> 8\u001b[0m session: Session \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m Session\u001b[38;5;241m.\u001b[39mcreate()\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 10\u001b[0m named_map: NamedMap[\u001b[38;5;28mstr\u001b[39m, Document] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m session\u001b[38;5;241m.\u001b[39mget_map(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmy-map\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"File \u001b[0;32m~/work/coherence/github/dhirupandey/langchain/libs/partners/coherence/.venv/lib/python3.9/site-packages/coherence/client.py:1932\u001b[0m, in \u001b[0;36mSession.create\u001b[0;34m(session_options)\u001b[0m\n\u001b[1;32m 1930\u001b[0m session: Session \u001b[38;5;241m=\u001b[39m Session(session_options)\n\u001b[1;32m 1931\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m session\u001b[38;5;241m.\u001b[39m_set_ready(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m-> 1932\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m session\u001b[38;5;241m.\u001b[39m_handshake\u001b[38;5;241m.\u001b[39mhandshake()\n\u001b[1;32m 1933\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m session\u001b[38;5;241m.\u001b[39m_protocol_version \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1934\u001b[0m COH_LOG\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 1935\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSession(id=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msession\u001b[38;5;241m.\u001b[39msession_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, connected to [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msession\u001b[38;5;241m.\u001b[39m_session_options\u001b[38;5;241m.\u001b[39maddress\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1936\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m proxy-version=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msession\u001b[38;5;241m.\u001b[39m_proxy_version\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, protocol-version=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msession\u001b[38;5;241m.\u001b[39m_protocol_version\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1937\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m proxy-member-id=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msession\u001b[38;5;241m.\u001b[39m_proxy_member_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1938\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/work/coherence/github/dhirupandey/langchain/libs/partners/coherence/.venv/lib/python3.9/site-packages/coherence/client.py:129\u001b[0m, in \u001b[0;36m_Handshake.handshake\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 130\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnexpected error, \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, when attempting to handshake with proxy: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;241m.\u001b[39mdetails()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 131\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mTimeoutError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 133\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHandshake with proxy timed out\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n",
|
||||
"\u001b[0;31mRuntimeError\u001b[0m: Unexpected error, <AioRpcError of RPC that terminated with:\n\tstatus = StatusCode.UNAVAILABLE\n\tdetails = \"connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)\"\n\tdebug_error_string = \"UNKNOWN:Error received from peer {grpc_status:14, grpc_message:\"connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)\"}\"\n>, when attempting to handshake with proxy: connections to all backends failing; last error: UNKNOWN: ipv4:127.0.0.1:1408: Failed to connect to remote host: connect: Connection refused (61)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.embeddings import Embeddings\n",
|
||||
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
|
||||
"\n",
|
||||
"from coherence import NamedMap, Session\n",
|
||||
"from langchain_coherence import CoherenceVectorStore\n",
|
||||
"\n",
|
||||
"session: Session = await Session.create()\n",
|
||||
"try:\n",
|
||||
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
|
||||
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
|
||||
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
|
||||
" # this embedding generates vectors of dimension 384\n",
|
||||
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
|
||||
" named_map,embedding,384)\n",
|
||||
" # other operations on the CoherenceVectorStore can be done\n",
|
||||
"finally:\n",
|
||||
" await session.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ca1dbd0-4fec-4907-8918-0a438a1b2535",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "e7171672-4453-4b6f-afe6-071980908a5f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Add Documents and retrieve them:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d52c0187-f5c4-4c69-9052-0b8998945680",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.embeddings import Embeddings\n",
|
||||
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
|
||||
"\n",
|
||||
"from coherence import NamedMap, Session\n",
|
||||
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
|
||||
"\n",
|
||||
"session: Session = await Session.create()\n",
|
||||
"try:\n",
|
||||
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
|
||||
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
|
||||
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
|
||||
" # this embedding generates vectors of dimension 384\n",
|
||||
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
|
||||
" named_map,embedding,384)\n",
|
||||
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
|
||||
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
|
||||
" documents = [d1, d2]\n",
|
||||
" await cvs.aadd_documents(documents)\n",
|
||||
"\n",
|
||||
" ids = [doc.id for doc in documents]\n",
|
||||
" l = await cvs.aget_by_ids(ids)\n",
|
||||
" assert len(l) == len(ids)\n",
|
||||
" print(\"====\")\n",
|
||||
" for e in l:\n",
|
||||
" print(e)\n",
|
||||
"finally:\n",
|
||||
" await session.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "f0f7215e-16a4-4fe7-8070-34c385aeeead",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete Documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "11328dac-633e-4155-8f19-95ef2bfa3d06",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.embeddings import Embeddings\n",
|
||||
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
|
||||
"\n",
|
||||
"from coherence import NamedMap, Session\n",
|
||||
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
|
||||
"\n",
|
||||
"session: Session = await Session.create()\n",
|
||||
"try:\n",
|
||||
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
|
||||
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
|
||||
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
|
||||
" # this embedding generates vectors of dimension 384\n",
|
||||
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
|
||||
" named_map,embedding,384)\n",
|
||||
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
|
||||
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
|
||||
" documents = [d1, d2]\n",
|
||||
" await cvs.aadd_documents(documents)\n",
|
||||
"\n",
|
||||
" ids = [doc.id for doc in documents]\n",
|
||||
" await cvs.adelete(ids)\n",
|
||||
"finally:\n",
|
||||
" await session.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8241b585-64ae-447a-b1a7-860d3c51f823",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Query vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a8c6ad91-ee54-486a-888f-cbfc89be75fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Similarity Search:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ffe24b05-7bf1-4eaa-a030-6ac3a0446f29",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.embeddings import Embeddings\n",
|
||||
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
|
||||
"\n",
|
||||
"from coherence import NamedMap, Session\n",
|
||||
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
|
||||
"\n",
|
||||
"def test_data():\n",
|
||||
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
|
||||
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
|
||||
" d3 :Document = Document(id=\"3\", page_content=\"tiger\")\n",
|
||||
" d4 :Document = Document(id=\"4\", page_content=\"cat\")\n",
|
||||
" d5 :Document = Document(id=\"5\", page_content=\"dog\")\n",
|
||||
" d6 :Document = Document(id=\"6\", page_content=\"fox\")\n",
|
||||
" d7 :Document = Document(id=\"7\", page_content=\"pear\")\n",
|
||||
" d8 :Document = Document(id=\"8\", page_content=\"banana\")\n",
|
||||
" d9 :Document = Document(id=\"9\", page_content=\"plum\")\n",
|
||||
" d10 :Document = Document(id=\"10\", page_content=\"lion\")\n",
|
||||
"\n",
|
||||
" documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]\n",
|
||||
" return documents\n",
|
||||
"\n",
|
||||
"async def test_asimilarity_search():\n",
|
||||
" documents = test_data()\n",
|
||||
" session: Session = await Session.create()\n",
|
||||
" try:\n",
|
||||
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
|
||||
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
|
||||
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
|
||||
" # this embedding generates vectors of dimension 384\n",
|
||||
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
|
||||
" named_map,embedding,384)\n",
|
||||
" await cvs.aadd_documents(documents)\n",
|
||||
" ids = [doc.id for doc in documents]\n",
|
||||
" l = await cvs.aget_by_ids(ids)\n",
|
||||
" assert len(l) == 10\n",
|
||||
"\n",
|
||||
" result = await cvs.asimilarity_search(\"fruit\")\n",
|
||||
" assert len(result) == 4\n",
|
||||
" print(\"====\")\n",
|
||||
" for e in result:\n",
|
||||
" print(e)\n",
|
||||
" finally:\n",
|
||||
" await session.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6862ae65-a3d6-46e4-9e85-f3f64f2add5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5411477e-5905-4a96-80d8-b8d2238c4bc4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Similarity Search by vector :"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dbb07be8-b13e-4bd5-a9ea-94603ff2a6e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.embeddings import Embeddings\n",
|
||||
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
|
||||
"\n",
|
||||
"from coherence import NamedMap, Session\n",
|
||||
"from langchain_core.vectorstores.coherence_store import CoherenceVectorStore\n",
|
||||
"\n",
|
||||
"def test_data():\n",
|
||||
" d1 :Document = Document(id=\"1\", page_content=\"apple\")\n",
|
||||
" d2 :Document = Document(id=\"2\", page_content=\"orange\")\n",
|
||||
" d3 :Document = Document(id=\"3\", page_content=\"tiger\")\n",
|
||||
" d4 :Document = Document(id=\"4\", page_content=\"cat\")\n",
|
||||
" d5 :Document = Document(id=\"5\", page_content=\"dog\")\n",
|
||||
" d6 :Document = Document(id=\"6\", page_content=\"fox\")\n",
|
||||
" d7 :Document = Document(id=\"7\", page_content=\"pear\")\n",
|
||||
" d8 :Document = Document(id=\"8\", page_content=\"banana\")\n",
|
||||
" d9 :Document = Document(id=\"9\", page_content=\"plum\")\n",
|
||||
" d10 :Document = Document(id=\"10\", page_content=\"lion\")\n",
|
||||
"\n",
|
||||
" documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]\n",
|
||||
" return documents\n",
|
||||
"\n",
|
||||
"async def test_asimilarity_search_by_vector():\n",
|
||||
" documents = test_data()\n",
|
||||
" session: Session = await Session.create()\n",
|
||||
" try:\n",
|
||||
" named_map: NamedMap[str, Document] = await session.get_map(\"my-map\")\n",
|
||||
" embedding :Embeddings = HuggingFaceEmbeddings(\n",
|
||||
" model_name=\"sentence-transformers/all-MiniLM-l6-v2\")\n",
|
||||
" # this embedding generates vectors of dimension 384\n",
|
||||
" cvs :CoherenceVectorStore = await CoherenceVectorStore.create(\n",
|
||||
" named_map,embedding,384)\n",
|
||||
" await cvs.aadd_documents(documents)\n",
|
||||
" ids = [doc.id for doc in documents]\n",
|
||||
" l = await cvs.aget_by_ids(ids)\n",
|
||||
" assert len(l) == 10\n",
|
||||
"\n",
|
||||
" vector = cvs.embeddings.embed_query(\"fruit\")\n",
|
||||
" result = await cvs.asimilarity_search_by_vector(vector)\n",
|
||||
" assert len(result) == 4\n",
|
||||
" print(\"====\")\n",
|
||||
" for e in result:\n",
|
||||
" print(e)\n",
|
||||
" finally:\n",
|
||||
" await session.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "803d5c9c-0df5-4a27-b179-b97c0ca0c27a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## API reference"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.21"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
36
libs/partners/coherence/.gitignore
vendored
Normal file
36
libs/partners/coherence/.gitignore
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.egg
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
.eggs/
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
.env/
|
||||
|
||||
# uv cache
|
||||
.uv/
|
||||
|
||||
# Testing
|
||||
htmlcov/
|
||||
.cache/
|
||||
.coverage
|
||||
coverage.xml
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# OCA
|
||||
.oca
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
21
libs/partners/coherence/LICENSE
Normal file
21
libs/partners/coherence/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
84
libs/partners/coherence/Makefile
Normal file
84
libs/partners/coherence/Makefile
Normal file
@ -0,0 +1,84 @@
|
||||
# Makefile for LangChain-Coherence Integration
|
||||
.PHONY: install update-dev update-core lock sync lint format fix check test clean docs-ipynb readme-ipynb create-ipynb help
|
||||
|
||||
# Paths to common tools (adjust if using .uv or other env)
|
||||
PYTHON := .venv/bin/python
|
||||
PIP := .venv/bin/pip
|
||||
RUFF := .venv/bin/ruff
|
||||
PYTEST := .venv/bin/pytest
|
||||
MYPY := .venv/bin/mypy
|
||||
|
||||
PACKAGE_NAME=langchain-coherence
|
||||
DIST_DIR=dist
|
||||
|
||||
install:
|
||||
@echo "🔧 Installing all dependencies..."
|
||||
uv venv
|
||||
uv pip install -e .[lint,typing,test,docs,publish]
|
||||
|
||||
update-dev:
|
||||
@echo "🔄 Updating development dependencies..."
|
||||
uv pip install -e .[lint,typing,test,docs] --upgrade
|
||||
|
||||
update-core:
|
||||
@echo "🔄 Updating core dependencies..."
|
||||
uv pip install --upgrade langchain-core coherence-client
|
||||
|
||||
lock:
|
||||
@echo "🔐 Locking all dependencies to uv.lock..."
|
||||
uv pip compile --all-extras
|
||||
|
||||
sync:
|
||||
@echo "📦 Syncing dependencies from uv.lock..."
|
||||
uv pip install -r uv.lock
|
||||
|
||||
lint: check
|
||||
|
||||
check:
|
||||
@echo "🔍 Running linter and type checker..."
|
||||
$(RUFF) check langchain_coherence tests
|
||||
$(MYPY) --explicit-package-bases langchain_coherence
|
||||
|
||||
format:
|
||||
@echo "🎨 Formatting code with Ruff..."
|
||||
$(RUFF) format langchain_coherence tests
|
||||
|
||||
fix:
|
||||
@echo "🔧 Fixing lint issues..."
|
||||
$(MAKE) format
|
||||
$(RUFF) check langchain_coherence tests --fix
|
||||
|
||||
test:
|
||||
@echo "🧪 Running tests..."
|
||||
$(PYTEST)
|
||||
|
||||
clean:
|
||||
@echo "🧹 Cleaning build/test artifacts..."
|
||||
rm -rf .pytest_cache .mypy_cache .ruff_cache __pycache__ *.egg-info dist build
|
||||
|
||||
build:
|
||||
@echo "🧱 Building distribution using local virtualenv"
|
||||
$(PYTHON) -m build --no-isolation
|
||||
|
||||
upload-pypi:
|
||||
@echo "🚀 Uploading to PyPI"
|
||||
$(PYTHON) -m twine upload dist/*
|
||||
|
||||
publish: build upload-pypi
|
||||
|
||||
help:
|
||||
@echo "🛠 Available Make targets:"
|
||||
@echo " install - Install all dependencies into .venv"
|
||||
@echo " update-dev - Upgrade dev dependencies (ruff, pytest, etc.)"
|
||||
@echo " update-core - Upgrade core runtime deps (langchain-core, coherence-client)"
|
||||
@echo " lock - Generate uv.lock with pinned versions"
|
||||
@echo " sync - Install from uv.lock (repeatable builds)"
|
||||
@echo " lint - Run linter and mypy"
|
||||
@echo " fix - Autoformat and fix issues"
|
||||
@echo " test - Run all tests"
|
||||
@echo " build - Building distribution using local virtualenv"
|
||||
@echo " upload-pypi - Uploading to PyPI"
|
||||
@echo " publish - calls build, upload-pypi"
|
||||
@echo " clean - Remove temp and build files"
|
||||
|
||||
|
103
libs/partners/coherence/README.md
Normal file
103
libs/partners/coherence/README.md
Normal file
@ -0,0 +1,103 @@
|
||||
# LangChain Coherence Integration
|
||||
|
||||
This package integrates Oracle Coherence as a vector store in LangChain.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install langchain_coherence
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Before using LangChain's CoherenceVectorStore you must ensure that a Coherence server ([Coherence CE](https://github.com/oracle/coherence) 25.03+ or [Oracle Coherence](https://www.oracle.com/java/coherence/) 14.1.2+) is running
|
||||
|
||||
For local development, we recommend using the Coherence CE container image:
|
||||
```aiignore
|
||||
docker run -d -p 1408:1408 ghcr.io/oracle/coherence-ce:25.03.2
|
||||
```
|
||||
|
||||
### Adding and retrieving Documents
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
|
||||
from langchain_coherence import CoherenceVectorStore
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
from coherence import NamedMap, Session
|
||||
|
||||
async def do_run():
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding,384)
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
documents = [d1, d2]
|
||||
await cvs.aadd_documents(documents)
|
||||
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == len(ids)
|
||||
print("====")
|
||||
for e in l:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
asyncio.run(do_run())
|
||||
```
|
||||
### SimilaritySearch on Documents
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
def test_data():
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
d3 :Document = Document(id="3", page_content="tiger")
|
||||
d4 :Document = Document(id="4", page_content="cat")
|
||||
d5 :Document = Document(id="5", page_content="dog")
|
||||
d6 :Document = Document(id="6", page_content="fox")
|
||||
d7 :Document = Document(id="7", page_content="pear")
|
||||
d8 :Document = Document(id="8", page_content="banana")
|
||||
d9 :Document = Document(id="9", page_content="plum")
|
||||
d10 :Document = Document(id="10", page_content="lion")
|
||||
|
||||
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||
return documents
|
||||
|
||||
async def test_asimilarity_search():
|
||||
documents = test_data()
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding,384)
|
||||
await cvs.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
result = await cvs.asimilarity_search("fruit")
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
```
|
9
libs/partners/coherence/langchain_coherence/__init__.py
Normal file
9
libs/partners/coherence/langchain_coherence/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
"""Public interface for the LangChain Coherence integration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__version__ = "0.0.1"
|
||||
|
||||
from langchain_coherence.coherence_store import CoherenceVectorStore
|
||||
|
||||
__all__ = ["CoherenceVectorStore"]
|
600
libs/partners/coherence/langchain_coherence/coherence_store.py
Normal file
600
libs/partners/coherence/langchain_coherence/coherence_store.py
Normal file
@ -0,0 +1,600 @@
|
||||
"""Coherence vector store."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Final,
|
||||
Optional,
|
||||
cast,
|
||||
)
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator, Sequence
|
||||
|
||||
import jsonpickle # type: ignore[import-untyped]
|
||||
from coherence import ( # type: ignore[import-untyped]
|
||||
Extractors,
|
||||
Filters,
|
||||
NamedCache,
|
||||
)
|
||||
from coherence.ai import ( # type: ignore[import-untyped]
|
||||
CosineDistance,
|
||||
DistanceAlgorithm,
|
||||
FloatVector,
|
||||
HnswIndex,
|
||||
QueryResult,
|
||||
SimilaritySearch,
|
||||
Vector,
|
||||
Vectors,
|
||||
)
|
||||
from coherence.extractor import ( # type: ignore[import-untyped]
|
||||
ValueExtractor,
|
||||
)
|
||||
from coherence.filter import ( # type: ignore[import-untyped]
|
||||
Filter,
|
||||
)
|
||||
from coherence.serialization import ( # type: ignore[import-untyped]
|
||||
JSONSerializer,
|
||||
SerializerRegistry,
|
||||
)
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
|
||||
class CoherenceVectorStore(VectorStore):
|
||||
"""Coherence VectorStore implementation.
|
||||
|
||||
Uses Coherence NamedCache, for similarity search.
|
||||
|
||||
Setup:
|
||||
Install ``langchain-core``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-core
|
||||
|
||||
Add Documents and retrieve them:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
documents = [d1, d2]
|
||||
await cvs.aadd_documents(documents)
|
||||
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == len(ids)
|
||||
print("====")
|
||||
for e in l:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
Delete Documents:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding)
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
documents = [d1, d2]
|
||||
await cvs.aadd_documents(documents)
|
||||
|
||||
ids = [doc.id for doc in documents]
|
||||
await cvs.adelete(ids)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
Similarity Search:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
def test_data():
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
d3 :Document = Document(id="3", page_content="tiger")
|
||||
d4 :Document = Document(id="4", page_content="cat")
|
||||
d5 :Document = Document(id="5", page_content="dog")
|
||||
d6 :Document = Document(id="6", page_content="fox")
|
||||
d7 :Document = Document(id="7", page_content="pear")
|
||||
d8 :Document = Document(id="8", page_content="banana")
|
||||
d9 :Document = Document(id="9", page_content="plum")
|
||||
d10 :Document = Document(id="10", page_content="lion")
|
||||
|
||||
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||
return documents
|
||||
|
||||
async def test_asimilarity_search():
|
||||
documents = test_data()
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding)
|
||||
await cvs.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
result = await cvs.asimilarity_search("fruit")
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
Similarity Search by vector :
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedMap, Session
|
||||
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
|
||||
|
||||
def test_data():
|
||||
d1 :Document = Document(id="1", page_content="apple")
|
||||
d2 :Document = Document(id="2", page_content="orange")
|
||||
d3 :Document = Document(id="3", page_content="tiger")
|
||||
d4 :Document = Document(id="4", page_content="cat")
|
||||
d5 :Document = Document(id="5", page_content="dog")
|
||||
d6 :Document = Document(id="6", page_content="fox")
|
||||
d7 :Document = Document(id="7", page_content="pear")
|
||||
d8 :Document = Document(id="8", page_content="banana")
|
||||
d9 :Document = Document(id="9", page_content="plum")
|
||||
d10 :Document = Document(id="10", page_content="lion")
|
||||
|
||||
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||
return documents
|
||||
|
||||
async def test_asimilarity_search_by_vector():
|
||||
documents = test_data()
|
||||
session: Session = await Session.create()
|
||||
try:
|
||||
named_map: NamedMap[str, Document] = await session.get_map("my-map")
|
||||
embedding :Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2")
|
||||
# this embedding generates vectors of dimension 384
|
||||
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_map,embedding)
|
||||
await cvs.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await cvs.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
vector = cvs.embeddings.embed_query("fruit")
|
||||
result = await cvs.asimilarity_search_by_vector(vector)
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
"""
|
||||
|
||||
VECTOR_FIELD: Final[str] = "__dict__.metadata.vector"
|
||||
"""The name of the field containing the vector embeddings."""
|
||||
|
||||
VECTOR_EXTRACTOR: Final[ValueExtractor] = Extractors.extract(VECTOR_FIELD)
|
||||
"""The ValueExtractor to extract the embeddings vector."""
|
||||
|
||||
def __init__(self, coherence_cache: NamedCache, embedding: Embeddings):
|
||||
"""Initialize with Coherence cache and embedding function.
|
||||
|
||||
Args:
|
||||
coherence_cache: Coherence NamedCache to use
|
||||
embedding: embedding function to use.
|
||||
"""
|
||||
self.cache = coherence_cache
|
||||
self.embedding = embedding
|
||||
|
||||
@staticmethod
|
||||
async def create(
|
||||
coherence_cache: NamedCache,
|
||||
embedding: Embeddings,
|
||||
) -> CoherenceVectorStore:
|
||||
"""Create an instance of CoherenceVectorStore.
|
||||
|
||||
Args:
|
||||
coherence_cache: Coherence NamedCache to use
|
||||
embedding: embedding function to use.
|
||||
"""
|
||||
coh_store: CoherenceVectorStore = CoherenceVectorStore(
|
||||
coherence_cache, embedding
|
||||
)
|
||||
return coh_store
|
||||
|
||||
async def add_index(self, dimensions: int) -> None:
|
||||
"""Creates index on the Coherence cache on the VECTOR_FIELD.
|
||||
|
||||
Args:
|
||||
dimensions: size of the vector created by the embedding function
|
||||
"""
|
||||
await self.cache.add_index(
|
||||
HnswIndex(CoherenceVectorStore.VECTOR_EXTRACTOR, dimensions)
|
||||
)
|
||||
|
||||
async def remove_index(self) -> None:
|
||||
"""Removes index on the Coherence cache on the VECTOR_FIELD."""
|
||||
await self.cache.remove_index(CoherenceVectorStore.VECTOR_EXTRACTOR)
|
||||
|
||||
@property
|
||||
@override
|
||||
def embeddings(self) -> Embeddings:
|
||||
return self.embedding
|
||||
|
||||
@override
|
||||
def add_documents(
|
||||
self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
|
||||
) -> list[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def aadd_documents(
|
||||
self, documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any
|
||||
) -> list[str]:
|
||||
"""Async run more documents through the embeddings and add to the vectorstore.
|
||||
|
||||
Args:
|
||||
documents: Documents to add to the vectorstore.
|
||||
ids: Optional list of IDs of the documents.
|
||||
kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
List of IDs of the added texts.
|
||||
|
||||
Raises:
|
||||
ValueError: If the number of IDs does not match the number of documents.
|
||||
"""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
vectors = await self.embedding.aembed_documents(texts)
|
||||
|
||||
# Apply normalization and wrap in FloatVector
|
||||
float_vectors = [FloatVector(Vectors.normalize(vector)) for vector in vectors]
|
||||
|
||||
if ids and len(ids) != len(texts):
|
||||
msg = (
|
||||
f"ids must be the same length as texts. "
|
||||
f"Got {len(ids)} ids and {len(texts)} texts."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
id_iterator: Iterator[Optional[str]] = (
|
||||
iter(ids) if ids else iter(doc.id for doc in documents)
|
||||
)
|
||||
ids_: list[str] = []
|
||||
|
||||
doc_map: dict[str, Document] = {}
|
||||
for doc, vector in zip(documents, float_vectors):
|
||||
doc_id = next(id_iterator)
|
||||
doc_id_ = doc_id or str(uuid.uuid4())
|
||||
ids_.append(doc_id_)
|
||||
doc.metadata["vector"] = vector
|
||||
doc_map[doc_id_] = doc
|
||||
|
||||
await self.cache.put_all(doc_map)
|
||||
|
||||
return ids_
|
||||
|
||||
@override
|
||||
def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def aget_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
||||
"""Get documents by their ids.
|
||||
|
||||
Args:
|
||||
ids: The ids of the documents to get.
|
||||
|
||||
Returns:
|
||||
A list of Document objects.
|
||||
"""
|
||||
return [e.value async for e in await self.cache.get_all(set(ids))]
|
||||
|
||||
@override
|
||||
async def adelete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
|
||||
"""Async delete by Documeny ID or other criteria.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete. If None, delete all. Default is None.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
|
||||
Returns:
|
||||
Optional[bool]: True if deletion is successful,
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
if ids is None:
|
||||
await self.cache.clear()
|
||||
else:
|
||||
# Efficient parallel delete
|
||||
await asyncio.gather(*(self.cache.remove(i) for i in ids))
|
||||
|
||||
def _parse_coherence_kwargs(
|
||||
self, **kwargs: Any
|
||||
) -> tuple[DistanceAlgorithm, Filter, bool]:
|
||||
allowed_keys = {"algorithm", "filter", "brute_force"}
|
||||
extra_keys = set(kwargs) - allowed_keys
|
||||
if extra_keys:
|
||||
# Silently ignore or log if needed
|
||||
for key in extra_keys:
|
||||
kwargs.pop(key)
|
||||
|
||||
algorithm: DistanceAlgorithm = kwargs.get("algorithm", CosineDistance())
|
||||
filter_: Filter = kwargs.get("filter", Filters.always())
|
||||
brute_force: bool = kwargs.get("brute_force", False)
|
||||
|
||||
return (algorithm, filter_, brute_force)
|
||||
|
||||
@override
|
||||
async def asimilarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
"""Async method return list of docs most similar to query.
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: Optional arguments:
|
||||
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||
- filter: filter to use to limit the set of entries to search.
|
||||
(default Filters.always())
|
||||
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||
- brute_force: Force brute force search, ignoring any available indexes.
|
||||
(default False)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query.
|
||||
"""
|
||||
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||
|
||||
query_vector = self.embedding.embed_query(query)
|
||||
float_query_vector = FloatVector(Vectors.normalize(query_vector))
|
||||
|
||||
search: SimilaritySearch = SimilaritySearch(
|
||||
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||
float_query_vector,
|
||||
k,
|
||||
algorithm=algorithm,
|
||||
filter=filter_,
|
||||
brute_force=brute_force,
|
||||
)
|
||||
query_results = await self.cache.aggregate(search)
|
||||
|
||||
return [e.value for e in query_results]
|
||||
|
||||
@override
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def asimilarity_search_by_vector(
|
||||
self, embedding: list[float], k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
"""Async method return list of docs most similar to passed embedding vector.
|
||||
|
||||
Args:
|
||||
embedding: Input vector.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: Optional arguments:
|
||||
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||
- filter: filter to use to limit the set of entries to search.
|
||||
(default Filters.always())
|
||||
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||
- brute_force: Force brute force search, ignoring any available indexes.
|
||||
(default False)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query.
|
||||
"""
|
||||
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||
float_query_vector = FloatVector(Vectors.normalize(embedding))
|
||||
|
||||
search: SimilaritySearch = SimilaritySearch(
|
||||
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||
float_query_vector,
|
||||
k,
|
||||
algorithm=algorithm,
|
||||
filter=filter_,
|
||||
brute_force=brute_force,
|
||||
)
|
||||
query_results = await self.cache.aggregate(search, filter=Filters.always())
|
||||
|
||||
return [e.value for e in query_results]
|
||||
|
||||
@override
|
||||
def similarity_search_by_vector(
|
||||
self, embedding: list[float], k: int = 4, **kwargs: Any
|
||||
) -> list[Document]:
|
||||
raise NotImplementedError
|
||||
|
||||
@override
|
||||
async def asimilarity_search_with_score(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[tuple[Document, float]]:
|
||||
"""Async method return list of tuple(Document, score) most similar to query.
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: Optional arguments:
|
||||
- algorithm: DistanceAlgorithm to use.(default CosineDistance)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#cosinedistance
|
||||
- filter: filter to use to limit the set of entries to search.
|
||||
(default Filters.always())
|
||||
https://oracle.github.io/coherence-py-client/api_reference/filter.html
|
||||
- brute_force: Force brute force search, ignoring any available indexes.
|
||||
(default False)
|
||||
https://oracle.github.io/coherence-py-client/api_reference/ai.html#similaritysearch
|
||||
|
||||
Returns:
|
||||
List of tuple(Document, score) most similar to the query.
|
||||
"""
|
||||
algorithm, filter_, brute_force = self._parse_coherence_kwargs(**kwargs)
|
||||
query_vector = self.embedding.embed_query(query)
|
||||
float_query_vector = FloatVector(Vectors.normalize(query_vector))
|
||||
|
||||
search: SimilaritySearch = SimilaritySearch(
|
||||
CoherenceVectorStore.VECTOR_EXTRACTOR,
|
||||
float_query_vector,
|
||||
k,
|
||||
algorithm=algorithm,
|
||||
filter=filter_,
|
||||
brute_force=brute_force,
|
||||
)
|
||||
query_results: list[QueryResult] = await self.cache.aggregate(
|
||||
search, filter=Filters.always()
|
||||
)
|
||||
|
||||
return [(e.value, e.distance) for e in query_results]
|
||||
|
||||
@override
|
||||
def similarity_search_with_score(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
) -> list[tuple[Document, float]]:
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
@override
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: list[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[list[dict[Any, Any]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> CoherenceVectorStore:
|
||||
msg = "Use `afrom_texts()` instead; sync context is not supported."
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
@classmethod
|
||||
@override
|
||||
async def afrom_texts(
|
||||
cls,
|
||||
texts: list[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[list[dict[str, Any]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> CoherenceVectorStore:
|
||||
"""Asynchronously initialize the CoherenceVectorStore from texts and embeddings.
|
||||
|
||||
Args:
|
||||
texts: List of input text strings.
|
||||
embedding: Embedding function to use.
|
||||
metadatas: Optional list of metadata dicts corresponding to each text.
|
||||
kwargs: Additional keyword arguments.
|
||||
- cache: Required Coherence NamedCache[str, Document] instance.
|
||||
- ids: Optional list of document IDs.
|
||||
|
||||
Returns:
|
||||
CoherenceVectorStore: An initialized and populated vector store.
|
||||
|
||||
Raises:
|
||||
ValueError: If `cache` is not provided.
|
||||
"""
|
||||
# Extract and validate required Coherence cache
|
||||
cache = kwargs.get("cache")
|
||||
if cache is None:
|
||||
msg = "Missing required 'cache' parameter in afrom_texts"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Optionally use caller-supplied document IDs
|
||||
ids: Optional[list[str]] = kwargs.get("ids")
|
||||
if ids is not None and len(ids) != len(texts):
|
||||
msg = "Length of 'ids' must match length of 'texts'"
|
||||
raise ValueError(msg)
|
||||
|
||||
# Create store instance
|
||||
store = await cls.create(cache, embedding)
|
||||
|
||||
# Construct Document objects
|
||||
documents = []
|
||||
for i, text in enumerate(texts):
|
||||
metadata = metadatas[i] if metadatas and i < len(metadatas) else {}
|
||||
doc_id = ids[i] if ids else str(uuid.uuid4())
|
||||
documents.append(Document(page_content=text, metadata=metadata, id=doc_id))
|
||||
|
||||
# Add documents to vector store
|
||||
await store.aadd_documents(documents)
|
||||
return store
|
||||
|
||||
|
||||
@jsonpickle.handlers.register(Document)
|
||||
class _LangChainDocumentHandler(jsonpickle.handlers.BaseHandler): # type: ignore[misc]
|
||||
def flatten(self, obj: object, data: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Flatten object to a dictionary for handler to use."""
|
||||
ser = SerializerRegistry.serializer(JSONSerializer.SER_FORMAT)
|
||||
json_ser = cast("JSONSerializer", ser)
|
||||
o = cast("Document", obj)
|
||||
vector = o.metadata["vector"]
|
||||
if vector is not None and isinstance(vector, Vector):
|
||||
s = json_ser.serialize(vector)
|
||||
d = json.loads(s[1:])
|
||||
o.metadata["vector"] = json_ser.flatten_to_dict(d)
|
||||
|
||||
data["__dict__"] = obj.__dict__
|
||||
return data
|
||||
|
||||
def restore(self, obj: dict[str, Any]) -> Document:
|
||||
"""Convert dictionary to an object for handler to use."""
|
||||
ser = SerializerRegistry.serializer(JSONSerializer.SER_FORMAT)
|
||||
json_ser = cast("JSONSerializer", ser)
|
||||
d = Document(page_content="")
|
||||
d.__dict__ = obj["__dict__"]
|
||||
vector = d.metadata["vector"]
|
||||
if vector is not None and isinstance(vector, dict):
|
||||
d.metadata["vector"] = json_ser.restore_to_object(vector)
|
||||
return d
|
95
libs/partners/coherence/pyproject.toml
Normal file
95
libs/partners/coherence/pyproject.toml
Normal file
@ -0,0 +1,95 @@
|
||||
[project]
|
||||
name = "langchain-coherence"
|
||||
version = "0.0.1"
|
||||
description = "LangChain integration for Oracle Coherence as a vector store."
|
||||
authors = [{ name = "Your Name", email = "you@example.com" }]
|
||||
license = {text = "MIT"}
|
||||
readme = "README.md"
|
||||
dependencies = [
|
||||
"langchain-core>=0.1.20",
|
||||
"coherence-client>=2.0.4",
|
||||
]
|
||||
requires-python = ">=3.9"
|
||||
|
||||
[project.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/coherence"
|
||||
repository = "https://github.com/langchain-ai/langchain"
|
||||
|
||||
[project.optional-dependencies]
|
||||
lint = [
|
||||
"ruff<0.12.0,>=0.11.2",
|
||||
]
|
||||
typing = [
|
||||
"mypy<1.16,>=1.15",
|
||||
]
|
||||
test = [
|
||||
"pytest<9,>=8",
|
||||
"pytest-asyncio<1.0.0,>=0.21.1",
|
||||
"langchain_huggingface",
|
||||
"sentence_transformers"
|
||||
]
|
||||
docs = [
|
||||
"jupytext>=1.16",
|
||||
"nbdoc>=0.0.29",
|
||||
]
|
||||
publish = [
|
||||
"build",
|
||||
"twine"
|
||||
]
|
||||
[tool.mypy]
|
||||
strict = "True"
|
||||
disallow_untyped_defs = "True"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [ "ALL",]
|
||||
ignore = [
|
||||
"C90", # McCabe complexity
|
||||
"COM812", # Messes with the formatter
|
||||
"FA100", # Can't activate since we exclude UP007 for now
|
||||
"FIX002", # Line contains TODO
|
||||
"ISC001", # Messes with the formatter
|
||||
"PERF203", # Rarely useful
|
||||
"PLR09", # Too many something (arg, statements, etc)
|
||||
"RUF012", # Doesn't play well with Pydantic
|
||||
"TC001", # Doesn't play well with Pydantic
|
||||
"TC002", # Doesn't play well with Pydantic
|
||||
"TC003", # Doesn't play well with Pydantic
|
||||
"TD002", # Missing author in TODO
|
||||
"TD003", # Missing issue link in TODO
|
||||
"UP007", # Doesn't play well with Pydantic in Python 3.9
|
||||
|
||||
# TODO rules
|
||||
"ANN401",
|
||||
"BLE",
|
||||
"ERA",
|
||||
"PLR2004",
|
||||
]
|
||||
flake8-annotations.allow-star-arg-any = true
|
||||
flake8-annotations.mypy-init-return = true
|
||||
pydocstyle.convention = "google"
|
||||
pydocstyle.ignore-var-parameters = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_default_fixture_loop_scope = "function"
|
||||
testpaths = ["tests"]
|
||||
filterwarnings = [
|
||||
"ignore::UserWarning:pkg_resources"
|
||||
]
|
||||
markers = [
|
||||
"compile: marker used to test compilation-only tests"
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"tests/**" = [
|
||||
"D", # docstring rules
|
||||
"ANN", # missing type annotations
|
||||
"T201", # use of `print`
|
||||
"S101", # use of `assert`
|
||||
"E741", # ambiguous variable name like `l`
|
||||
"RET504", # unnecessary assignment before return
|
||||
"I001", # import sorting
|
||||
"UP035" # import from collections.abc instead of typing
|
||||
]
|
0
libs/partners/coherence/tests/__init__.py
Normal file
0
libs/partners/coherence/tests/__init__.py
Normal file
@ -0,0 +1,153 @@
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import inspect
|
||||
|
||||
from typing import AsyncGenerator
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.documents import Document
|
||||
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
from coherence import NamedCache, Session
|
||||
from langchain_coherence import CoherenceVectorStore
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def store() -> AsyncGenerator[CoherenceVectorStore, None]:
|
||||
session: Session = await Session.create()
|
||||
named_cache: NamedCache[str, Document] = await session.get_cache("my-map")
|
||||
embedding: Embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2"
|
||||
)
|
||||
cvs: CoherenceVectorStore = await CoherenceVectorStore.create(
|
||||
named_cache, embedding
|
||||
)
|
||||
yield cvs
|
||||
await cvs.cache.destroy()
|
||||
await session.close()
|
||||
|
||||
|
||||
def get_test_data():
|
||||
d1: Document = Document(id="1", page_content="apple")
|
||||
d2: Document = Document(id="2", page_content="orange")
|
||||
d3: Document = Document(id="3", page_content="tiger")
|
||||
d4: Document = Document(id="4", page_content="cat")
|
||||
d5: Document = Document(id="5", page_content="dog")
|
||||
d6: Document = Document(id="6", page_content="fox")
|
||||
d7: Document = Document(id="7", page_content="pear")
|
||||
d8: Document = Document(id="8", page_content="banana")
|
||||
d9: Document = Document(id="9", page_content="plum")
|
||||
d10: Document = Document(id="10", page_content="lion")
|
||||
|
||||
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
|
||||
return documents
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aget_by_id(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
print("====")
|
||||
for e in l:
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_adelete(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
await store.adelete(["1", "2"])
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 8
|
||||
await store.adelete()
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_asimilarity_search(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
# result = await coherence_store.asimilarity_search("animal")
|
||||
result = await store.asimilarity_search("fruit")
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_asimilarity_search_by_vector(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
vector = store.embeddings.embed_query("animal")
|
||||
result = await store.asimilarity_search_by_vector(vector)
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_asimilarity_search_with_score(store: CoherenceVectorStore):
|
||||
print()
|
||||
print(f"=======: {inspect.currentframe().f_code.co_name}")
|
||||
documents = get_test_data()
|
||||
await store.aadd_documents(documents)
|
||||
ids = [doc.id for doc in documents]
|
||||
l = await store.aget_by_ids(ids)
|
||||
assert len(l) == 10
|
||||
|
||||
# result = await coherence_store.asimilarity_search("animal")
|
||||
result = await store.asimilarity_search_with_score("fruit")
|
||||
assert len(result) == 4
|
||||
print("====")
|
||||
for e in result:
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_afrom_texts():
|
||||
session = await Session.create()
|
||||
try:
|
||||
cache = await session.get_cache("test-map-async")
|
||||
embedding = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-l6-v2"
|
||||
)
|
||||
texts = ["apple", "banana"]
|
||||
metadatas = [{"cat": "fruit"}, {"cat": "fruit"}]
|
||||
ids = ["id1", "id2"]
|
||||
|
||||
store = await CoherenceVectorStore.afrom_texts(
|
||||
texts=texts,
|
||||
embedding=embedding,
|
||||
cache=cache,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
)
|
||||
|
||||
results = await store.aget_by_ids(ids)
|
||||
assert len(results) == 2
|
||||
finally:
|
||||
await session.close()
|
@ -0,0 +1,6 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.compile
|
||||
def test_placeholder() -> None:
|
||||
"""Used for compiling integration tests without running any real tests."""
|
3986
libs/partners/coherence/uv.lock
Normal file
3986
libs/partners/coherence/uv.lock
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user