mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 12:48:12 +00:00
community: VectorStore Infinispan, adding autoconfiguration (#18967)
**Description**: this PR enable VectorStore autoconfiguration for Infinispan: if metadatas are only of basic types, protobuf config will be automatically generated for the user.
This commit is contained in:
parent
6f544a6a25
commit
9b2f9ee952
@ -37,9 +37,21 @@
|
||||
"\n",
|
||||
"To run this demo we need a running Infinispan instance without authentication and a data file.\n",
|
||||
"In the next three cells we're going to:\n",
|
||||
"- download the data file\n",
|
||||
"- create the configuration\n",
|
||||
"- run Infinispan in docker\n",
|
||||
"- download the data file"
|
||||
"- run Infinispan in docker"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"#get an archive of news\n",
|
||||
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -76,18 +88,6 @@
|
||||
"' > infinispan-noauth.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"#get an archive of news\n",
|
||||
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -95,7 +95,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0.0.Dev09 -c /user-config/infinispan-noauth.yaml "
|
||||
"!docker rm --force infinispanvs-demo\n",
|
||||
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0 -c /user-config/infinispan-noauth.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -133,80 +134,8 @@
|
||||
"## Setup Infinispan cache\n",
|
||||
"\n",
|
||||
"Infinispan is a very flexible key-value store, it can store raw bits as well as complex data type.\n",
|
||||
"We need to configure it to store data containing embedded vectors.\n",
|
||||
"\n",
|
||||
"In the next cells we're going to:\n",
|
||||
"- create an empty Infinispan VectoreStore\n",
|
||||
"- deploy a protobuf definition of our data\n",
|
||||
"- create a cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "49668bf1-778b-466d-86fb-41747ed52b74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Creating a langchain_core.VectorStore\n",
|
||||
"from langchain_community.vectorstores import InfinispanVS\n",
|
||||
"\n",
|
||||
"ispnvs = InfinispanVS.from_texts(\n",
|
||||
" texts={}, embedding=hf, cache_name=\"demo_cache\", entity_name=\"demo_entity\"\n",
|
||||
")\n",
|
||||
"ispn = ispnvs.ispn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0cedf066-aaab-4185-b049-93eea9b48329",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Protobuf definition\n",
|
||||
"\n",
|
||||
"Below there's the protobuf definition of our data type that contains:\n",
|
||||
"- embedded vector (field 1)\n",
|
||||
"- text of the news (2)\n",
|
||||
"- title of the news (3)\n",
|
||||
"\n",
|
||||
"As you can see, there are additional annotations in the comments that tell Infinispan that:\n",
|
||||
"- data type must be indexed (`@Indexed`)\n",
|
||||
"- field 1 is an embeddeded vector (`@Vector`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1fa0add0-8317-4667-9b8c-5d91c47f752a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Infinispan supports protobuf schemas\n",
|
||||
"schema_vector = \"\"\"\n",
|
||||
"/**\n",
|
||||
" * @Indexed\n",
|
||||
" */\n",
|
||||
"message demo_entity {\n",
|
||||
"/**\n",
|
||||
" * @Vector(dimension=384)\n",
|
||||
" */\n",
|
||||
"repeated float vector = 1;\n",
|
||||
"optional string text = 2;\n",
|
||||
"optional string title = 3;\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"# Cleanup before deploy a new schema\n",
|
||||
"ispnvs.schema_delete()\n",
|
||||
"output = ispnvs.schema_create(schema_vector)\n",
|
||||
"assert output.status_code == 200\n",
|
||||
"assert json.loads(output.text)[\"error\"] is None\n",
|
||||
"# Create the cache\n",
|
||||
"ispnvs.cache_create()\n",
|
||||
"# Cleanup old data and index\n",
|
||||
"ispnvs.cache_clear()\n",
|
||||
"ispnvs.cache_index_reindex()"
|
||||
"User has complete freedom in the datagrid configuration, but for simple data type everything is automatically\n",
|
||||
"configured by the python layer. We take advantage of this feature so we can focus on our application."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -216,8 +145,7 @@
|
||||
"source": [
|
||||
"## Prepare the data\n",
|
||||
"\n",
|
||||
"In this demo we choose to store text,vector and metadata in the same cache, but other options\n",
|
||||
"are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
||||
"In this demo we rely on the default configuration, thus texts, metadatas and vectors in the same cache, but other options are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -239,15 +167,12 @@
|
||||
" metas = []\n",
|
||||
" embeds = []\n",
|
||||
" for row in spamreader:\n",
|
||||
" # first and fifth value are joined to form the content\n",
|
||||
" # first and fifth values are joined to form the content\n",
|
||||
" # to be processed\n",
|
||||
" text = row[0] + \".\" + row[4]\n",
|
||||
" texts.append(text)\n",
|
||||
" # Storing meta\n",
|
||||
" # Store text and title as metadata\n",
|
||||
" meta = {}\n",
|
||||
" meta[\"text\"] = row[4]\n",
|
||||
" meta[\"title\"] = row[0]\n",
|
||||
" meta = {\"text\": row[4], \"title\": row[0]}\n",
|
||||
" metas.append(meta)\n",
|
||||
" i = i + 1\n",
|
||||
" # Change this to change the number of news you want to load\n",
|
||||
@ -271,7 +196,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# add texts and fill vector db\n",
|
||||
"keys = ispnvs.add_texts(texts, metas)"
|
||||
"\n",
|
||||
"from langchain_community.vectorstores import InfinispanVS\n",
|
||||
"\n",
|
||||
"ispnvs = InfinispanVS.from_texts(texts, hf, metas)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -361,18 +289,6 @@
|
||||
"print_docs(ispnvs.similarity_search(\"How to stay young\", 5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "862e4af2-9f8a-4985-90cb-997477901b1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Clean up\n",
|
||||
"ispnvs.schema_delete()\n",
|
||||
"ispnvs.cache_delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -400,7 +316,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.18"
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -5,14 +5,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import (
|
||||
Any,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
)
|
||||
from typing import Any, Iterable, List, Optional, Tuple, Type, cast
|
||||
|
||||
import requests
|
||||
from langchain_core.documents import Document
|
||||
@ -25,29 +18,44 @@ logger = logging.getLogger(__name__)
|
||||
class InfinispanVS(VectorStore):
|
||||
"""`Infinispan` VectorStore interface.
|
||||
|
||||
This class exposes the method to present Infinispan as a
|
||||
VectorStore. It relies on the Infinispan class (below) which takes care
|
||||
of the REST interface with the server.
|
||||
This class exposes the method to present Infinispan as a
|
||||
VectorStore. It relies on the Infinispan class (below) which takes care
|
||||
of the REST interface with the server.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
... code-block:: python
|
||||
from langchain_community.vectorstores import InfinispanVS
|
||||
from mymodels import RGBEmbeddings
|
||||
|
||||
...
|
||||
vectorDb = InfinispanVS.from_documents(docs,
|
||||
embedding=RGBEmbeddings(),
|
||||
output_fields=["texture", "color"],
|
||||
lambda_key=lambda text,meta: str(meta["_key"]),
|
||||
lambda_content=lambda item: item["color"])
|
||||
|
||||
or an empty InfinispanVS instance can be created if preliminary setup
|
||||
is required before populating the store
|
||||
|
||||
... code-block:: python
|
||||
from langchain_community.vectorstores import InfinispanVS
|
||||
from mymodels import RGBEmbeddings
|
||||
...
|
||||
ispnVS = InfinispanVS()
|
||||
# configure Infinispan here
|
||||
# i.e. create cache and schema
|
||||
|
||||
# then populate the store
|
||||
vectorDb = InfinispanVS.from_documents(docs,
|
||||
embedding=RGBEmbeddings(),
|
||||
output_fields: ["texture", "color"],
|
||||
lambda_key: lambda text,meta: str(meta["_key"]),
|
||||
lambda_content: lambda item: item["color"]})
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding: Optional[Embeddings] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
clear_old: Optional[bool] = True,
|
||||
**kwargs: Any,
|
||||
):
|
||||
self.ispn = Infinispan(**kwargs)
|
||||
@ -65,8 +73,6 @@ class InfinispanVS(VectorStore):
|
||||
)
|
||||
self._output_fields = self._configuration.get("output_fields")
|
||||
self._ids = ids
|
||||
if clear_old:
|
||||
self.ispn.cache_clear(self._cache_name)
|
||||
|
||||
def _default_metadata(self, item: dict) -> dict:
|
||||
meta = dict(item)
|
||||
@ -78,6 +84,43 @@ class InfinispanVS(VectorStore):
|
||||
def _default_content(self, item: dict[str, Any]) -> Any:
|
||||
return item.get(self._textfield)
|
||||
|
||||
def schema_builder(self, templ: dict, dimension: int) -> str:
|
||||
metadata_proto_tpl = """
|
||||
/**
|
||||
* @Indexed
|
||||
*/
|
||||
message %s {
|
||||
/**
|
||||
* @Vector(dimension=%d)
|
||||
*/
|
||||
repeated float %s = 1;
|
||||
"""
|
||||
metadata_proto = metadata_proto_tpl % (
|
||||
self._entity_name,
|
||||
dimension,
|
||||
self._vectorfield,
|
||||
)
|
||||
idx = 2
|
||||
for f, v in templ.items():
|
||||
if isinstance(v, str):
|
||||
metadata_proto += "optional string " + f + " = " + str(idx) + ";\n"
|
||||
elif isinstance(v, int):
|
||||
metadata_proto += "optional int64 " + f + " = " + str(idx) + ";\n"
|
||||
elif isinstance(v, float):
|
||||
metadata_proto += "optional double " + f + " = " + str(idx) + ";\n"
|
||||
elif isinstance(v, bytes):
|
||||
metadata_proto += "optional bytes " + f + " = " + str(idx) + ";\n"
|
||||
elif isinstance(v, bool):
|
||||
metadata_proto += "optional bool " + f + " = " + str(idx) + ";\n"
|
||||
else:
|
||||
raise Exception(
|
||||
"Unable to build proto schema for metadata. "
|
||||
"Unhandled type for field: " + f
|
||||
)
|
||||
idx += 1
|
||||
metadata_proto += "}\n"
|
||||
return metadata_proto
|
||||
|
||||
def schema_create(self, proto: str) -> requests.Response:
|
||||
"""Deploy the schema for the vector db
|
||||
Args:
|
||||
@ -143,6 +186,13 @@ class InfinispanVS(VectorStore):
|
||||
"""
|
||||
return self.ispn.cache_clear(self._cache_name)
|
||||
|
||||
def cache_exists(self) -> bool:
|
||||
"""Checks if the cache exists
|
||||
Returns:
|
||||
true if exists
|
||||
"""
|
||||
return self.ispn.cache_exists(self._cache_name)
|
||||
|
||||
def cache_index_clear(self) -> requests.Response:
|
||||
"""Clear the index for the vector db
|
||||
Returns:
|
||||
@ -161,10 +211,16 @@ class InfinispanVS(VectorStore):
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
last_vector: Optional[List[float]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
result = []
|
||||
embeds = self._embedding.embed_documents(list(texts)) # type: ignore
|
||||
texts_l = list(texts)
|
||||
if last_vector:
|
||||
texts_l.pop()
|
||||
embeds = self._embedding.embed_documents(texts_l) # type: ignore
|
||||
if last_vector:
|
||||
embeds.append(last_vector)
|
||||
if not metadatas:
|
||||
metadatas = [{} for _ in texts]
|
||||
ids = self._ids or [str(uuid.uuid4()) for _ in texts]
|
||||
@ -266,6 +322,23 @@ class InfinispanVS(VectorStore):
|
||||
documents.append((doc, hit["score()"]))
|
||||
return documents
|
||||
|
||||
def configure(self, metadata: dict, dimension: int) -> None:
|
||||
schema = self.schema_builder(metadata, dimension)
|
||||
output = self.schema_create(schema)
|
||||
assert output.ok, "Unable to create schema. Already exists? "
|
||||
"Consider using clear_old=True"
|
||||
assert json.loads(output.text)["error"] is None
|
||||
if not self.cache_exists():
|
||||
output = self.cache_create()
|
||||
assert output.ok, "Unable to create cache. Already exists? "
|
||||
"Consider using clear_old=True"
|
||||
# Ensure index is clean
|
||||
self.cache_index_clear()
|
||||
|
||||
def config_clear(self) -> None:
|
||||
self.schema_delete()
|
||||
self.cache_delete()
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls: Type[InfinispanVS],
|
||||
@ -273,13 +346,24 @@ class InfinispanVS(VectorStore):
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
clear_old: Optional[bool] = None,
|
||||
clear_old: Optional[bool] = True,
|
||||
auto_config: Optional[bool] = True,
|
||||
**kwargs: Any,
|
||||
) -> InfinispanVS:
|
||||
"""Return VectorStore initialized from texts and embeddings."""
|
||||
infinispanvs = cls(embedding=embedding, ids=ids, clear_old=clear_old, **kwargs)
|
||||
infinispanvs = cls(embedding=embedding, ids=ids, **kwargs)
|
||||
if auto_config and len(metadatas or []) > 0:
|
||||
if clear_old:
|
||||
infinispanvs.config_clear()
|
||||
vec = embedding.embed_query(texts[len(texts) - 1])
|
||||
metadatas = cast(List[dict], metadatas)
|
||||
infinispanvs.configure(metadatas[0], len(vec))
|
||||
else:
|
||||
if clear_old:
|
||||
infinispanvs.cache_clear()
|
||||
vec = embedding.embed_query(texts[len(texts) - 1])
|
||||
if texts:
|
||||
infinispanvs.add_texts(texts, metadatas)
|
||||
infinispanvs.add_texts(texts, metadatas, vector=vec)
|
||||
return infinispanvs
|
||||
|
||||
|
||||
@ -293,7 +377,8 @@ class Infinispan:
|
||||
create and set up a vector db.
|
||||
|
||||
You need a running Infinispan (15+) server without authentication.
|
||||
You can easily start one, see: https://github.com/rigazilla/infinispan-vector#run-infinispan
|
||||
You can easily start one, see:
|
||||
https://github.com/rigazilla/infinispan-vector#run-infinispan
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any):
|
||||
@ -473,6 +558,29 @@ class Infinispan:
|
||||
response = requests.post(api_url, timeout=REST_TIMEOUT)
|
||||
return response
|
||||
|
||||
def cache_exists(self, cache_name: str) -> bool:
|
||||
"""Check if a cache exists
|
||||
Args:
|
||||
cache_name(str): name of the cache.
|
||||
Returns:
|
||||
True if cache exists
|
||||
"""
|
||||
api_url = (
|
||||
self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
|
||||
)
|
||||
return self.resource_exists(api_url)
|
||||
|
||||
@staticmethod
|
||||
def resource_exists(api_url: str) -> bool:
|
||||
"""Check if a resource exists
|
||||
Args:
|
||||
api_url(str): url of the resource.
|
||||
Returns:
|
||||
true if resource exists
|
||||
"""
|
||||
response = requests.head(api_url, timeout=REST_TIMEOUT)
|
||||
return response.ok
|
||||
|
||||
def index_clear(self, cache_name: str) -> requests.Response:
|
||||
"""Clear an index on a cache
|
||||
Args:
|
||||
|
@ -1,17 +1,19 @@
|
||||
"""Test Infinispan functionality."""
|
||||
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.vectorstores import InfinispanVS
|
||||
from langchain_community.vectorstores.infinispanvs import InfinispanVS
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
FakeEmbeddings,
|
||||
fake_texts,
|
||||
)
|
||||
|
||||
|
||||
def _infinispan_setup() -> None:
|
||||
ispnvs = InfinispanVS()
|
||||
def _infinispan_setup_noautoconf() -> None:
|
||||
ispnvs = InfinispanVS(auto_config=False)
|
||||
ispnvs.cache_delete()
|
||||
ispnvs.schema_delete()
|
||||
proto = """
|
||||
@ -37,6 +39,7 @@ def _infinispanvs_from_texts(
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
clear_old: Optional[bool] = True,
|
||||
auto_config: Optional[bool] = False,
|
||||
**kwargs: Any,
|
||||
) -> InfinispanVS:
|
||||
texts = [{"text": t} for t in fake_texts]
|
||||
@ -50,86 +53,109 @@ def _infinispanvs_from_texts(
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
clear_old=clear_old,
|
||||
auto_config=auto_config,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def test_infinispan() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
_infinispan_setup()
|
||||
docsearch = _infinispanvs_from_texts()
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
@pytest.mark.parametrize("autoconfig", [False, True])
|
||||
class TestBasic:
|
||||
def test_infinispan(self, autoconfig: bool) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
docsearch = _infinispanvs_from_texts(auto_config=autoconfig)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_infinispan_with_metadata(self, autoconfig: bool) -> None:
|
||||
"""Test with metadata"""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
meta = []
|
||||
for _ in range(len(fake_texts)):
|
||||
meta.append({"label": "test"})
|
||||
docsearch = _infinispanvs_from_texts(metadatas=meta, auto_config=autoconfig)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"label": "test"})]
|
||||
|
||||
def test_infinispan_with_metadata() -> None:
|
||||
"""Test with metadata"""
|
||||
_infinispan_setup()
|
||||
meta = []
|
||||
for _ in range(len(fake_texts)):
|
||||
meta.append({"label": "test"})
|
||||
docsearch = _infinispanvs_from_texts(metadatas=meta)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"label": "test"})]
|
||||
def test_infinispan_with_metadata_with_output_fields(
|
||||
self, autoconfig: bool
|
||||
) -> None:
|
||||
"""Test with metadata"""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
metadatas = [
|
||||
{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))
|
||||
]
|
||||
c = {"output_fields": ["label", "page", "text"]}
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, configuration=c, auto_config=autoconfig
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"label": "label0", "page": 0})
|
||||
]
|
||||
|
||||
def test_infinispanvs_with_id(self, autoconfig: bool) -> None:
|
||||
"""Test with ids"""
|
||||
ids = ["id_" + str(i) for i in range(len(fake_texts))]
|
||||
docsearch = _infinispanvs_from_texts(ids=ids, auto_config=autoconfig)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
def test_infinispan_with_metadata_with_output_fields() -> None:
|
||||
"""Test with metadata"""
|
||||
_infinispan_setup()
|
||||
metadatas = [{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))]
|
||||
c = {"output_fields": ["label", "page", "text"]}
|
||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas, configuration=c)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"label": "label0", "page": 0})
|
||||
]
|
||||
def test_infinispan_with_score(self, autoconfig: bool) -> None:
|
||||
"""Test end to end construction and search with scores and IDs."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, auto_config=autoconfig
|
||||
)
|
||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||
docs = [o[0] for o in output]
|
||||
scores = [o[1] for o in output]
|
||||
assert docs == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="bar", metadata={"page": 1}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
||||
assert scores[0] >= scores[1] >= scores[2]
|
||||
|
||||
def test_infinispan_add_texts(self, autoconfig: bool) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, auto_config=autoconfig
|
||||
)
|
||||
|
||||
def test_infinispanvs_with_id() -> None:
|
||||
"""Test with ids"""
|
||||
ids = ["id_" + str(i) for i in range(len(fake_texts))]
|
||||
docsearch = _infinispanvs_from_texts(ids=ids)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
docsearch.add_texts(texts, metadatas)
|
||||
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 6
|
||||
|
||||
def test_infinispan_with_score() -> None:
|
||||
"""Test end to end construction and search with scores and IDs."""
|
||||
_infinispan_setup()
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas)
|
||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||
docs = [o[0] for o in output]
|
||||
scores = [o[1] for o in output]
|
||||
assert docs == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="bar", metadata={"page": 1}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
||||
assert scores[0] >= scores[1] >= scores[2]
|
||||
|
||||
|
||||
def test_infinispan_add_texts() -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
_infinispan_setup()
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas)
|
||||
|
||||
docsearch.add_texts(texts, metadatas)
|
||||
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 6
|
||||
|
||||
|
||||
def test_infinispan_no_clear_old() -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
_infinispan_setup()
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas)
|
||||
del docsearch
|
||||
docsearch = _infinispanvs_from_texts(metadatas=metadatas, clear_old=False)
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 6
|
||||
def test_infinispan_no_clear_old(self, autoconfig: bool) -> None:
|
||||
"""Test end to end construction and MRR search."""
|
||||
if not autoconfig:
|
||||
_infinispan_setup_noautoconf()
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, auto_config=autoconfig
|
||||
)
|
||||
del docsearch
|
||||
try:
|
||||
docsearch = _infinispanvs_from_texts(
|
||||
metadatas=metadatas, clear_old=False, auto_config=autoconfig
|
||||
)
|
||||
except AssertionError:
|
||||
if autoconfig:
|
||||
return
|
||||
else:
|
||||
raise
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 6
|
||||
|
Loading…
Reference in New Issue
Block a user