mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 20:58:25 +00:00
community: VectorStore Infinispan, adding autoconfiguration (#18967)
**Description**: this PR enable VectorStore autoconfiguration for Infinispan: if metadatas are only of basic types, protobuf config will be automatically generated for the user.
This commit is contained in:
parent
6f544a6a25
commit
9b2f9ee952
@ -37,9 +37,21 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"To run this demo we need a running Infinispan instance without authentication and a data file.\n",
|
"To run this demo we need a running Infinispan instance without authentication and a data file.\n",
|
||||||
"In the next three cells we're going to:\n",
|
"In the next three cells we're going to:\n",
|
||||||
|
"- download the data file\n",
|
||||||
"- create the configuration\n",
|
"- create the configuration\n",
|
||||||
"- run Infinispan in docker\n",
|
"- run Infinispan in docker"
|
||||||
"- download the data file"
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"#get an archive of news\n",
|
||||||
|
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -76,18 +88,6 @@
|
|||||||
"' > infinispan-noauth.yaml"
|
"' > infinispan-noauth.yaml"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%bash\n",
|
|
||||||
"#get an archive of news\n",
|
|
||||||
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -95,7 +95,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0.0.Dev09 -c /user-config/infinispan-noauth.yaml "
|
"!docker rm --force infinispanvs-demo\n",
|
||||||
|
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0 -c /user-config/infinispan-noauth.yaml"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -133,80 +134,8 @@
|
|||||||
"## Setup Infinispan cache\n",
|
"## Setup Infinispan cache\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Infinispan is a very flexible key-value store, it can store raw bits as well as complex data type.\n",
|
"Infinispan is a very flexible key-value store, it can store raw bits as well as complex data type.\n",
|
||||||
"We need to configure it to store data containing embedded vectors.\n",
|
"User has complete freedom in the datagrid configuration, but for simple data type everything is automatically\n",
|
||||||
"\n",
|
"configured by the python layer. We take advantage of this feature so we can focus on our application."
|
||||||
"In the next cells we're going to:\n",
|
|
||||||
"- create an empty Infinispan VectoreStore\n",
|
|
||||||
"- deploy a protobuf definition of our data\n",
|
|
||||||
"- create a cache"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "49668bf1-778b-466d-86fb-41747ed52b74",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Creating a langchain_core.VectorStore\n",
|
|
||||||
"from langchain_community.vectorstores import InfinispanVS\n",
|
|
||||||
"\n",
|
|
||||||
"ispnvs = InfinispanVS.from_texts(\n",
|
|
||||||
" texts={}, embedding=hf, cache_name=\"demo_cache\", entity_name=\"demo_entity\"\n",
|
|
||||||
")\n",
|
|
||||||
"ispn = ispnvs.ispn"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "0cedf066-aaab-4185-b049-93eea9b48329",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Protobuf definition\n",
|
|
||||||
"\n",
|
|
||||||
"Below there's the protobuf definition of our data type that contains:\n",
|
|
||||||
"- embedded vector (field 1)\n",
|
|
||||||
"- text of the news (2)\n",
|
|
||||||
"- title of the news (3)\n",
|
|
||||||
"\n",
|
|
||||||
"As you can see, there are additional annotations in the comments that tell Infinispan that:\n",
|
|
||||||
"- data type must be indexed (`@Indexed`)\n",
|
|
||||||
"- field 1 is an embeddeded vector (`@Vector`)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "1fa0add0-8317-4667-9b8c-5d91c47f752a",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"# Infinispan supports protobuf schemas\n",
|
|
||||||
"schema_vector = \"\"\"\n",
|
|
||||||
"/**\n",
|
|
||||||
" * @Indexed\n",
|
|
||||||
" */\n",
|
|
||||||
"message demo_entity {\n",
|
|
||||||
"/**\n",
|
|
||||||
" * @Vector(dimension=384)\n",
|
|
||||||
" */\n",
|
|
||||||
"repeated float vector = 1;\n",
|
|
||||||
"optional string text = 2;\n",
|
|
||||||
"optional string title = 3;\n",
|
|
||||||
"}\n",
|
|
||||||
"\"\"\"\n",
|
|
||||||
"# Cleanup before deploy a new schema\n",
|
|
||||||
"ispnvs.schema_delete()\n",
|
|
||||||
"output = ispnvs.schema_create(schema_vector)\n",
|
|
||||||
"assert output.status_code == 200\n",
|
|
||||||
"assert json.loads(output.text)[\"error\"] is None\n",
|
|
||||||
"# Create the cache\n",
|
|
||||||
"ispnvs.cache_create()\n",
|
|
||||||
"# Cleanup old data and index\n",
|
|
||||||
"ispnvs.cache_clear()\n",
|
|
||||||
"ispnvs.cache_index_reindex()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -216,8 +145,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Prepare the data\n",
|
"## Prepare the data\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this demo we choose to store text,vector and metadata in the same cache, but other options\n",
|
"In this demo we rely on the default configuration, thus texts, metadatas and vectors in the same cache, but other options are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
||||||
"are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -239,15 +167,12 @@
|
|||||||
" metas = []\n",
|
" metas = []\n",
|
||||||
" embeds = []\n",
|
" embeds = []\n",
|
||||||
" for row in spamreader:\n",
|
" for row in spamreader:\n",
|
||||||
" # first and fifth value are joined to form the content\n",
|
" # first and fifth values are joined to form the content\n",
|
||||||
" # to be processed\n",
|
" # to be processed\n",
|
||||||
" text = row[0] + \".\" + row[4]\n",
|
" text = row[0] + \".\" + row[4]\n",
|
||||||
" texts.append(text)\n",
|
" texts.append(text)\n",
|
||||||
" # Storing meta\n",
|
|
||||||
" # Store text and title as metadata\n",
|
" # Store text and title as metadata\n",
|
||||||
" meta = {}\n",
|
" meta = {\"text\": row[4], \"title\": row[0]}\n",
|
||||||
" meta[\"text\"] = row[4]\n",
|
|
||||||
" meta[\"title\"] = row[0]\n",
|
|
||||||
" metas.append(meta)\n",
|
" metas.append(meta)\n",
|
||||||
" i = i + 1\n",
|
" i = i + 1\n",
|
||||||
" # Change this to change the number of news you want to load\n",
|
" # Change this to change the number of news you want to load\n",
|
||||||
@ -271,7 +196,10 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# add texts and fill vector db\n",
|
"# add texts and fill vector db\n",
|
||||||
"keys = ispnvs.add_texts(texts, metas)"
|
"\n",
|
||||||
|
"from langchain_community.vectorstores import InfinispanVS\n",
|
||||||
|
"\n",
|
||||||
|
"ispnvs = InfinispanVS.from_texts(texts, hf, metas)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -361,18 +289,6 @@
|
|||||||
"print_docs(ispnvs.similarity_search(\"How to stay young\", 5))"
|
"print_docs(ispnvs.similarity_search(\"How to stay young\", 5))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "862e4af2-9f8a-4985-90cb-997477901b1e",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Clean up\n",
|
|
||||||
"ispnvs.schema_delete()\n",
|
|
||||||
"ispnvs.cache_delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -400,7 +316,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.18"
|
"version": "3.9.18"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
@ -5,14 +5,7 @@ from __future__ import annotations
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from typing import (
|
from typing import Any, Iterable, List, Optional, Tuple, Type, cast
|
||||||
Any,
|
|
||||||
Iterable,
|
|
||||||
List,
|
|
||||||
Optional,
|
|
||||||
Tuple,
|
|
||||||
Type,
|
|
||||||
)
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
@ -25,29 +18,44 @@ logger = logging.getLogger(__name__)
|
|||||||
class InfinispanVS(VectorStore):
|
class InfinispanVS(VectorStore):
|
||||||
"""`Infinispan` VectorStore interface.
|
"""`Infinispan` VectorStore interface.
|
||||||
|
|
||||||
This class exposes the method to present Infinispan as a
|
This class exposes the method to present Infinispan as a
|
||||||
VectorStore. It relies on the Infinispan class (below) which takes care
|
VectorStore. It relies on the Infinispan class (below) which takes care
|
||||||
of the REST interface with the server.
|
of the REST interface with the server.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
... code-block:: python
|
||||||
|
|
||||||
from langchain_community.vectorstores import InfinispanVS
|
from langchain_community.vectorstores import InfinispanVS
|
||||||
from mymodels import RGBEmbeddings
|
from mymodels import RGBEmbeddings
|
||||||
|
...
|
||||||
vectorDb = InfinispanVS.from_documents(docs,
|
vectorDb = InfinispanVS.from_documents(docs,
|
||||||
embedding=RGBEmbeddings(),
|
embedding=RGBEmbeddings(),
|
||||||
output_fields=["texture", "color"],
|
output_fields=["texture", "color"],
|
||||||
lambda_key=lambda text,meta: str(meta["_key"]),
|
lambda_key=lambda text,meta: str(meta["_key"]),
|
||||||
lambda_content=lambda item: item["color"])
|
lambda_content=lambda item: item["color"])
|
||||||
|
|
||||||
|
or an empty InfinispanVS instance can be created if preliminary setup
|
||||||
|
is required before populating the store
|
||||||
|
|
||||||
|
... code-block:: python
|
||||||
|
from langchain_community.vectorstores import InfinispanVS
|
||||||
|
from mymodels import RGBEmbeddings
|
||||||
|
...
|
||||||
|
ispnVS = InfinispanVS()
|
||||||
|
# configure Infinispan here
|
||||||
|
# i.e. create cache and schema
|
||||||
|
|
||||||
|
# then populate the store
|
||||||
|
vectorDb = InfinispanVS.from_documents(docs,
|
||||||
|
embedding=RGBEmbeddings(),
|
||||||
|
output_fields: ["texture", "color"],
|
||||||
|
lambda_key: lambda text,meta: str(meta["_key"]),
|
||||||
|
lambda_content: lambda item: item["color"]})
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
embedding: Optional[Embeddings] = None,
|
embedding: Optional[Embeddings] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
clear_old: Optional[bool] = True,
|
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
):
|
):
|
||||||
self.ispn = Infinispan(**kwargs)
|
self.ispn = Infinispan(**kwargs)
|
||||||
@ -65,8 +73,6 @@ class InfinispanVS(VectorStore):
|
|||||||
)
|
)
|
||||||
self._output_fields = self._configuration.get("output_fields")
|
self._output_fields = self._configuration.get("output_fields")
|
||||||
self._ids = ids
|
self._ids = ids
|
||||||
if clear_old:
|
|
||||||
self.ispn.cache_clear(self._cache_name)
|
|
||||||
|
|
||||||
def _default_metadata(self, item: dict) -> dict:
|
def _default_metadata(self, item: dict) -> dict:
|
||||||
meta = dict(item)
|
meta = dict(item)
|
||||||
@ -78,6 +84,43 @@ class InfinispanVS(VectorStore):
|
|||||||
def _default_content(self, item: dict[str, Any]) -> Any:
|
def _default_content(self, item: dict[str, Any]) -> Any:
|
||||||
return item.get(self._textfield)
|
return item.get(self._textfield)
|
||||||
|
|
||||||
|
def schema_builder(self, templ: dict, dimension: int) -> str:
|
||||||
|
metadata_proto_tpl = """
|
||||||
|
/**
|
||||||
|
* @Indexed
|
||||||
|
*/
|
||||||
|
message %s {
|
||||||
|
/**
|
||||||
|
* @Vector(dimension=%d)
|
||||||
|
*/
|
||||||
|
repeated float %s = 1;
|
||||||
|
"""
|
||||||
|
metadata_proto = metadata_proto_tpl % (
|
||||||
|
self._entity_name,
|
||||||
|
dimension,
|
||||||
|
self._vectorfield,
|
||||||
|
)
|
||||||
|
idx = 2
|
||||||
|
for f, v in templ.items():
|
||||||
|
if isinstance(v, str):
|
||||||
|
metadata_proto += "optional string " + f + " = " + str(idx) + ";\n"
|
||||||
|
elif isinstance(v, int):
|
||||||
|
metadata_proto += "optional int64 " + f + " = " + str(idx) + ";\n"
|
||||||
|
elif isinstance(v, float):
|
||||||
|
metadata_proto += "optional double " + f + " = " + str(idx) + ";\n"
|
||||||
|
elif isinstance(v, bytes):
|
||||||
|
metadata_proto += "optional bytes " + f + " = " + str(idx) + ";\n"
|
||||||
|
elif isinstance(v, bool):
|
||||||
|
metadata_proto += "optional bool " + f + " = " + str(idx) + ";\n"
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Unable to build proto schema for metadata. "
|
||||||
|
"Unhandled type for field: " + f
|
||||||
|
)
|
||||||
|
idx += 1
|
||||||
|
metadata_proto += "}\n"
|
||||||
|
return metadata_proto
|
||||||
|
|
||||||
def schema_create(self, proto: str) -> requests.Response:
|
def schema_create(self, proto: str) -> requests.Response:
|
||||||
"""Deploy the schema for the vector db
|
"""Deploy the schema for the vector db
|
||||||
Args:
|
Args:
|
||||||
@ -143,6 +186,13 @@ class InfinispanVS(VectorStore):
|
|||||||
"""
|
"""
|
||||||
return self.ispn.cache_clear(self._cache_name)
|
return self.ispn.cache_clear(self._cache_name)
|
||||||
|
|
||||||
|
def cache_exists(self) -> bool:
|
||||||
|
"""Checks if the cache exists
|
||||||
|
Returns:
|
||||||
|
true if exists
|
||||||
|
"""
|
||||||
|
return self.ispn.cache_exists(self._cache_name)
|
||||||
|
|
||||||
def cache_index_clear(self) -> requests.Response:
|
def cache_index_clear(self) -> requests.Response:
|
||||||
"""Clear the index for the vector db
|
"""Clear the index for the vector db
|
||||||
Returns:
|
Returns:
|
||||||
@ -161,10 +211,16 @@ class InfinispanVS(VectorStore):
|
|||||||
self,
|
self,
|
||||||
texts: Iterable[str],
|
texts: Iterable[str],
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
|
last_vector: Optional[List[float]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
result = []
|
result = []
|
||||||
embeds = self._embedding.embed_documents(list(texts)) # type: ignore
|
texts_l = list(texts)
|
||||||
|
if last_vector:
|
||||||
|
texts_l.pop()
|
||||||
|
embeds = self._embedding.embed_documents(texts_l) # type: ignore
|
||||||
|
if last_vector:
|
||||||
|
embeds.append(last_vector)
|
||||||
if not metadatas:
|
if not metadatas:
|
||||||
metadatas = [{} for _ in texts]
|
metadatas = [{} for _ in texts]
|
||||||
ids = self._ids or [str(uuid.uuid4()) for _ in texts]
|
ids = self._ids or [str(uuid.uuid4()) for _ in texts]
|
||||||
@ -266,6 +322,23 @@ class InfinispanVS(VectorStore):
|
|||||||
documents.append((doc, hit["score()"]))
|
documents.append((doc, hit["score()"]))
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
def configure(self, metadata: dict, dimension: int) -> None:
|
||||||
|
schema = self.schema_builder(metadata, dimension)
|
||||||
|
output = self.schema_create(schema)
|
||||||
|
assert output.ok, "Unable to create schema. Already exists? "
|
||||||
|
"Consider using clear_old=True"
|
||||||
|
assert json.loads(output.text)["error"] is None
|
||||||
|
if not self.cache_exists():
|
||||||
|
output = self.cache_create()
|
||||||
|
assert output.ok, "Unable to create cache. Already exists? "
|
||||||
|
"Consider using clear_old=True"
|
||||||
|
# Ensure index is clean
|
||||||
|
self.cache_index_clear()
|
||||||
|
|
||||||
|
def config_clear(self) -> None:
|
||||||
|
self.schema_delete()
|
||||||
|
self.cache_delete()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_texts(
|
def from_texts(
|
||||||
cls: Type[InfinispanVS],
|
cls: Type[InfinispanVS],
|
||||||
@ -273,13 +346,24 @@ class InfinispanVS(VectorStore):
|
|||||||
embedding: Embeddings,
|
embedding: Embeddings,
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
clear_old: Optional[bool] = None,
|
clear_old: Optional[bool] = True,
|
||||||
|
auto_config: Optional[bool] = True,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> InfinispanVS:
|
) -> InfinispanVS:
|
||||||
"""Return VectorStore initialized from texts and embeddings."""
|
"""Return VectorStore initialized from texts and embeddings."""
|
||||||
infinispanvs = cls(embedding=embedding, ids=ids, clear_old=clear_old, **kwargs)
|
infinispanvs = cls(embedding=embedding, ids=ids, **kwargs)
|
||||||
|
if auto_config and len(metadatas or []) > 0:
|
||||||
|
if clear_old:
|
||||||
|
infinispanvs.config_clear()
|
||||||
|
vec = embedding.embed_query(texts[len(texts) - 1])
|
||||||
|
metadatas = cast(List[dict], metadatas)
|
||||||
|
infinispanvs.configure(metadatas[0], len(vec))
|
||||||
|
else:
|
||||||
|
if clear_old:
|
||||||
|
infinispanvs.cache_clear()
|
||||||
|
vec = embedding.embed_query(texts[len(texts) - 1])
|
||||||
if texts:
|
if texts:
|
||||||
infinispanvs.add_texts(texts, metadatas)
|
infinispanvs.add_texts(texts, metadatas, vector=vec)
|
||||||
return infinispanvs
|
return infinispanvs
|
||||||
|
|
||||||
|
|
||||||
@ -293,7 +377,8 @@ class Infinispan:
|
|||||||
create and set up a vector db.
|
create and set up a vector db.
|
||||||
|
|
||||||
You need a running Infinispan (15+) server without authentication.
|
You need a running Infinispan (15+) server without authentication.
|
||||||
You can easily start one, see: https://github.com/rigazilla/infinispan-vector#run-infinispan
|
You can easily start one, see:
|
||||||
|
https://github.com/rigazilla/infinispan-vector#run-infinispan
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs: Any):
|
def __init__(self, **kwargs: Any):
|
||||||
@ -473,6 +558,29 @@ class Infinispan:
|
|||||||
response = requests.post(api_url, timeout=REST_TIMEOUT)
|
response = requests.post(api_url, timeout=REST_TIMEOUT)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
def cache_exists(self, cache_name: str) -> bool:
|
||||||
|
"""Check if a cache exists
|
||||||
|
Args:
|
||||||
|
cache_name(str): name of the cache.
|
||||||
|
Returns:
|
||||||
|
True if cache exists
|
||||||
|
"""
|
||||||
|
api_url = (
|
||||||
|
self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
|
||||||
|
)
|
||||||
|
return self.resource_exists(api_url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def resource_exists(api_url: str) -> bool:
|
||||||
|
"""Check if a resource exists
|
||||||
|
Args:
|
||||||
|
api_url(str): url of the resource.
|
||||||
|
Returns:
|
||||||
|
true if resource exists
|
||||||
|
"""
|
||||||
|
response = requests.head(api_url, timeout=REST_TIMEOUT)
|
||||||
|
return response.ok
|
||||||
|
|
||||||
def index_clear(self, cache_name: str) -> requests.Response:
|
def index_clear(self, cache_name: str) -> requests.Response:
|
||||||
"""Clear an index on a cache
|
"""Clear an index on a cache
|
||||||
Args:
|
Args:
|
||||||
|
@ -1,17 +1,19 @@
|
|||||||
"""Test Infinispan functionality."""
|
"""Test Infinispan functionality."""
|
||||||
|
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
|
import pytest
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
from langchain_community.vectorstores import InfinispanVS
|
from langchain_community.vectorstores.infinispanvs import InfinispanVS
|
||||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||||
FakeEmbeddings,
|
FakeEmbeddings,
|
||||||
fake_texts,
|
fake_texts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _infinispan_setup() -> None:
|
def _infinispan_setup_noautoconf() -> None:
|
||||||
ispnvs = InfinispanVS()
|
ispnvs = InfinispanVS(auto_config=False)
|
||||||
ispnvs.cache_delete()
|
ispnvs.cache_delete()
|
||||||
ispnvs.schema_delete()
|
ispnvs.schema_delete()
|
||||||
proto = """
|
proto = """
|
||||||
@ -37,6 +39,7 @@ def _infinispanvs_from_texts(
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
clear_old: Optional[bool] = True,
|
clear_old: Optional[bool] = True,
|
||||||
|
auto_config: Optional[bool] = False,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> InfinispanVS:
|
) -> InfinispanVS:
|
||||||
texts = [{"text": t} for t in fake_texts]
|
texts = [{"text": t} for t in fake_texts]
|
||||||
@ -50,86 +53,109 @@ def _infinispanvs_from_texts(
|
|||||||
metadatas=metadatas,
|
metadatas=metadatas,
|
||||||
ids=ids,
|
ids=ids,
|
||||||
clear_old=clear_old,
|
clear_old=clear_old,
|
||||||
|
auto_config=auto_config,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_infinispan() -> None:
|
@pytest.mark.parametrize("autoconfig", [False, True])
|
||||||
"""Test end to end construction and search."""
|
class TestBasic:
|
||||||
_infinispan_setup()
|
def test_infinispan(self, autoconfig: bool) -> None:
|
||||||
docsearch = _infinispanvs_from_texts()
|
"""Test end to end construction and search."""
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
if not autoconfig:
|
||||||
assert output == [Document(page_content="foo")]
|
_infinispan_setup_noautoconf()
|
||||||
|
docsearch = _infinispanvs_from_texts(auto_config=autoconfig)
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo")]
|
||||||
|
|
||||||
|
def test_infinispan_with_metadata(self, autoconfig: bool) -> None:
|
||||||
|
"""Test with metadata"""
|
||||||
|
if not autoconfig:
|
||||||
|
_infinispan_setup_noautoconf()
|
||||||
|
meta = []
|
||||||
|
for _ in range(len(fake_texts)):
|
||||||
|
meta.append({"label": "test"})
|
||||||
|
docsearch = _infinispanvs_from_texts(metadatas=meta, auto_config=autoconfig)
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo", metadata={"label": "test"})]
|
||||||
|
|
||||||
def test_infinispan_with_metadata() -> None:
|
def test_infinispan_with_metadata_with_output_fields(
|
||||||
"""Test with metadata"""
|
self, autoconfig: bool
|
||||||
_infinispan_setup()
|
) -> None:
|
||||||
meta = []
|
"""Test with metadata"""
|
||||||
for _ in range(len(fake_texts)):
|
if not autoconfig:
|
||||||
meta.append({"label": "test"})
|
_infinispan_setup_noautoconf()
|
||||||
docsearch = _infinispanvs_from_texts(metadatas=meta)
|
metadatas = [
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))
|
||||||
assert output == [Document(page_content="foo", metadata={"label": "test"})]
|
]
|
||||||
|
c = {"output_fields": ["label", "page", "text"]}
|
||||||
|
docsearch = _infinispanvs_from_texts(
|
||||||
|
metadatas=metadatas, configuration=c, auto_config=autoconfig
|
||||||
|
)
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == [
|
||||||
|
Document(page_content="foo", metadata={"label": "label0", "page": 0})
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_infinispanvs_with_id(self, autoconfig: bool) -> None:
|
||||||
|
"""Test with ids"""
|
||||||
|
ids = ["id_" + str(i) for i in range(len(fake_texts))]
|
||||||
|
docsearch = _infinispanvs_from_texts(ids=ids, auto_config=autoconfig)
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo")]
|
||||||
|
|
||||||
def test_infinispan_with_metadata_with_output_fields() -> None:
|
def test_infinispan_with_score(self, autoconfig: bool) -> None:
|
||||||
"""Test with metadata"""
|
"""Test end to end construction and search with scores and IDs."""
|
||||||
_infinispan_setup()
|
if not autoconfig:
|
||||||
metadatas = [{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))]
|
_infinispan_setup_noautoconf()
|
||||||
c = {"output_fields": ["label", "page", "text"]}
|
texts = ["foo", "bar", "baz"]
|
||||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas, configuration=c)
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
docsearch = _infinispanvs_from_texts(
|
||||||
assert output == [
|
metadatas=metadatas, auto_config=autoconfig
|
||||||
Document(page_content="foo", metadata={"label": "label0", "page": 0})
|
)
|
||||||
]
|
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||||
|
docs = [o[0] for o in output]
|
||||||
|
scores = [o[1] for o in output]
|
||||||
|
assert docs == [
|
||||||
|
Document(page_content="foo", metadata={"page": 0}),
|
||||||
|
Document(page_content="bar", metadata={"page": 1}),
|
||||||
|
Document(page_content="baz", metadata={"page": 2}),
|
||||||
|
]
|
||||||
|
assert scores[0] >= scores[1] >= scores[2]
|
||||||
|
|
||||||
|
def test_infinispan_add_texts(self, autoconfig: bool) -> None:
|
||||||
|
"""Test end to end construction and MRR search."""
|
||||||
|
if not autoconfig:
|
||||||
|
_infinispan_setup_noautoconf()
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
|
docsearch = _infinispanvs_from_texts(
|
||||||
|
metadatas=metadatas, auto_config=autoconfig
|
||||||
|
)
|
||||||
|
|
||||||
def test_infinispanvs_with_id() -> None:
|
docsearch.add_texts(texts, metadatas)
|
||||||
"""Test with ids"""
|
|
||||||
ids = ["id_" + str(i) for i in range(len(fake_texts))]
|
|
||||||
docsearch = _infinispanvs_from_texts(ids=ids)
|
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
|
||||||
assert output == [Document(page_content="foo")]
|
|
||||||
|
|
||||||
|
output = docsearch.similarity_search("foo", k=10)
|
||||||
|
assert len(output) == 6
|
||||||
|
|
||||||
def test_infinispan_with_score() -> None:
|
def test_infinispan_no_clear_old(self, autoconfig: bool) -> None:
|
||||||
"""Test end to end construction and search with scores and IDs."""
|
"""Test end to end construction and MRR search."""
|
||||||
_infinispan_setup()
|
if not autoconfig:
|
||||||
texts = ["foo", "bar", "baz"]
|
_infinispan_setup_noautoconf()
|
||||||
metadatas = [{"page": i} for i in range(len(texts))]
|
texts = ["foo", "bar", "baz"]
|
||||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas)
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
docsearch = _infinispanvs_from_texts(
|
||||||
docs = [o[0] for o in output]
|
metadatas=metadatas, auto_config=autoconfig
|
||||||
scores = [o[1] for o in output]
|
)
|
||||||
assert docs == [
|
del docsearch
|
||||||
Document(page_content="foo", metadata={"page": 0}),
|
try:
|
||||||
Document(page_content="bar", metadata={"page": 1}),
|
docsearch = _infinispanvs_from_texts(
|
||||||
Document(page_content="baz", metadata={"page": 2}),
|
metadatas=metadatas, clear_old=False, auto_config=autoconfig
|
||||||
]
|
)
|
||||||
assert scores[0] >= scores[1] >= scores[2]
|
except AssertionError:
|
||||||
|
if autoconfig:
|
||||||
|
return
|
||||||
def test_infinispan_add_texts() -> None:
|
else:
|
||||||
"""Test end to end construction and MRR search."""
|
raise
|
||||||
_infinispan_setup()
|
output = docsearch.similarity_search("foo", k=10)
|
||||||
texts = ["foo", "bar", "baz"]
|
assert len(output) == 6
|
||||||
metadatas = [{"page": i} for i in range(len(texts))]
|
|
||||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas)
|
|
||||||
|
|
||||||
docsearch.add_texts(texts, metadatas)
|
|
||||||
|
|
||||||
output = docsearch.similarity_search("foo", k=10)
|
|
||||||
assert len(output) == 6
|
|
||||||
|
|
||||||
|
|
||||||
def test_infinispan_no_clear_old() -> None:
|
|
||||||
"""Test end to end construction and MRR search."""
|
|
||||||
_infinispan_setup()
|
|
||||||
texts = ["foo", "bar", "baz"]
|
|
||||||
metadatas = [{"page": i} for i in range(len(texts))]
|
|
||||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas)
|
|
||||||
del docsearch
|
|
||||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas, clear_old=False)
|
|
||||||
output = docsearch.similarity_search("foo", k=10)
|
|
||||||
assert len(output) == 6
|
|
||||||
|
Loading…
Reference in New Issue
Block a user