community[minor]: VectorStore Infinispan. Adding TLS and authentication (#23522)

**Description**:
this PR enable VectorStore TLS and authentication (digest, basic) with
HTTP/2 for Infinispan server.
Based on httpx.

Added docker-compose facilities for testing
Added documentation

**Dependencies:**
requires `pip install httpx[http2]` if HTTP2 is needed

**Twitter handle:**
https://twitter.com/infinispan
This commit is contained in:
Vittorio Rigamonti 2024-10-09 16:51:39 +02:00 committed by GitHub
parent ff925d2ddc
commit 7da2efd9d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 353 additions and 83 deletions

View File

@ -5,9 +5,10 @@ from __future__ import annotations
import json import json
import logging import logging
import uuid import uuid
from typing import Any, Iterable, List, Optional, Tuple, Type, cast import warnings
from typing import Any, Iterable, List, Optional, Tuple, Type, Union, cast
import requests from httpx import Response
from langchain_core.documents import Document from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore from langchain_core.vectorstores import VectorStore
@ -49,7 +50,7 @@ class InfinispanVS(VectorStore):
embedding=RGBEmbeddings(), embedding=RGBEmbeddings(),
output_fields: ["texture", "color"], output_fields: ["texture", "color"],
lambda_key: lambda text,meta: str(meta["_key"]), lambda_key: lambda text,meta: str(meta["_key"]),
lambda_content: lambda item: item["color"]}) lambda_content: lambda item: item["color"])
""" """
def __init__( def __init__(
@ -58,13 +59,48 @@ class InfinispanVS(VectorStore):
ids: Optional[List[str]] = None, ids: Optional[List[str]] = None,
**kwargs: Any, **kwargs: Any,
): ):
"""
Parameters
----------
cache_name: str
Embeddings cache name. Default "vector"
entity_name: str
Protobuf entity name for the embeddings. Default "vector"
text_field: str
Protobuf field name for text. Default "text"
vector_field: str
Protobuf field name for vector. Default "vector"
lambda_content: lambda
Lambda returning the content part of an item. Default returns text_field
lambda_metadata: lambda
Lambda returning the metadata part of an item. Default returns items
fields excepts text_field, vector_field, _type
output_fields: List[str]
List of fields to be returned from item, if None return all fields.
Default None
kwargs: Any
Rest of arguments passed to Infinispan. See docs"""
self.ispn = Infinispan(**kwargs) self.ispn = Infinispan(**kwargs)
self._configuration = kwargs self._configuration = kwargs
self._cache_name = str(self._configuration.get("cache_name", "vector")) self._cache_name = str(self._configuration.get("cache_name", "vector"))
self._entity_name = str(self._configuration.get("entity_name", "vector")) self._entity_name = str(self._configuration.get("entity_name", "vector"))
self._embedding = embedding self._embedding = embedding
self._textfield = self._configuration.get("textfield", "text") self._textfield = self._configuration.get("textfield", "")
self._vectorfield = self._configuration.get("vectorfield", "vector") if self._textfield == "":
self._textfield = self._configuration.get("text_field", "text")
else:
warnings.warn(
"`textfield` is deprecated. Please use `text_field` " "param.",
DeprecationWarning,
)
self._vectorfield = self._configuration.get("vectorfield", "")
if self._vectorfield == "":
self._vectorfield = self._configuration.get("vector_field", "vector")
else:
warnings.warn(
"`vectorfield` is deprecated. Please use `vector_field` " "param.",
DeprecationWarning,
)
self._to_content = self._configuration.get( self._to_content = self._configuration.get(
"lambda_content", lambda item: self._default_content(item) "lambda_content", lambda item: self._default_content(item)
) )
@ -121,7 +157,7 @@ repeated float %s = 1;
metadata_proto += "}\n" metadata_proto += "}\n"
return metadata_proto return metadata_proto
def schema_create(self, proto: str) -> requests.Response: def schema_create(self, proto: str) -> Response:
"""Deploy the schema for the vector db """Deploy the schema for the vector db
Args: Args:
proto(str): protobuf schema proto(str): protobuf schema
@ -130,14 +166,14 @@ repeated float %s = 1;
""" """
return self.ispn.schema_post(self._entity_name + ".proto", proto) return self.ispn.schema_post(self._entity_name + ".proto", proto)
def schema_delete(self) -> requests.Response: def schema_delete(self) -> Response:
"""Delete the schema for the vector db """Delete the schema for the vector db
Returns: Returns:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
return self.ispn.schema_delete(self._entity_name + ".proto") return self.ispn.schema_delete(self._entity_name + ".proto")
def cache_create(self, config: str = "") -> requests.Response: def cache_create(self, config: str = "") -> Response:
"""Create the cache for the vector db """Create the cache for the vector db
Args: Args:
config(str): configuration of the cache. config(str): configuration of the cache.
@ -172,14 +208,14 @@ repeated float %s = 1;
) )
return self.ispn.cache_post(self._cache_name, config) return self.ispn.cache_post(self._cache_name, config)
def cache_delete(self) -> requests.Response: def cache_delete(self) -> Response:
"""Delete the cache for the vector db """Delete the cache for the vector db
Returns: Returns:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
return self.ispn.cache_delete(self._cache_name) return self.ispn.cache_delete(self._cache_name)
def cache_clear(self) -> requests.Response: def cache_clear(self) -> Response:
"""Clear the cache for the vector db """Clear the cache for the vector db
Returns: Returns:
An http Response containing the result of the operation An http Response containing the result of the operation
@ -193,14 +229,14 @@ repeated float %s = 1;
""" """
return self.ispn.cache_exists(self._cache_name) return self.ispn.cache_exists(self._cache_name)
def cache_index_clear(self) -> requests.Response: def cache_index_clear(self) -> Response:
"""Clear the index for the vector db """Clear the index for the vector db
Returns: Returns:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
return self.ispn.index_clear(self._cache_name) return self.ispn.index_clear(self._cache_name)
def cache_index_reindex(self) -> requests.Response: def cache_index_reindex(self) -> Response:
"""Rebuild the for the vector db """Rebuild the for the vector db
Returns: Returns:
An http Response containing the result of the operation An http Response containing the result of the operation
@ -325,12 +361,16 @@ repeated float %s = 1;
def configure(self, metadata: dict, dimension: int) -> None: def configure(self, metadata: dict, dimension: int) -> None:
schema = self.schema_builder(metadata, dimension) schema = self.schema_builder(metadata, dimension)
output = self.schema_create(schema) output = self.schema_create(schema)
assert output.ok, "Unable to create schema. Already exists? " assert (
output.status_code == self.ispn.Codes.OK
), "Unable to create schema. Already exists? "
"Consider using clear_old=True" "Consider using clear_old=True"
assert json.loads(output.text)["error"] is None assert json.loads(output.text)["error"] is None
if not self.cache_exists(): if not self.cache_exists():
output = self.cache_create() output = self.cache_create()
assert output.ok, "Unable to create cache. Already exists? " assert (
output.status_code == self.ispn.Codes.OK
), "Unable to create cache. Already exists? "
"Consider using clear_old=True" "Consider using clear_old=True"
# Ensure index is clean # Ensure index is clean
self.cache_index_clear() self.cache_index_clear()
@ -350,7 +390,24 @@ repeated float %s = 1;
auto_config: Optional[bool] = True, auto_config: Optional[bool] = True,
**kwargs: Any, **kwargs: Any,
) -> InfinispanVS: ) -> InfinispanVS:
"""Return VectorStore initialized from texts and embeddings.""" """Return VectorStore initialized from texts and embeddings.
In addition to parameters described by the super method, this
implementation provides other configuration params if different
configuration from default is needed.
Parameters
----------
ids : List[str]
Additional list of keys associated to the embedding. If not
provided UUIDs will be generated
clear_old : bool
Whether old data must be deleted. Default True
auto_config: bool
Whether to do a complete server setup (caches,
protobuf definition...). Default True
kwargs: Any
Rest of arguments passed to InfinispanVS. See docs"""
infinispanvs = cls(embedding=embedding, ids=ids, **kwargs) infinispanvs = cls(embedding=embedding, ids=ids, **kwargs)
if auto_config and len(metadatas or []) > 0: if auto_config and len(metadatas or []) > 0:
if clear_old: if clear_old:
@ -381,20 +438,83 @@ class Infinispan:
https://github.com/rigazilla/infinispan-vector#run-infinispan https://github.com/rigazilla/infinispan-vector#run-infinispan
""" """
def __init__(self, **kwargs: Any): def __init__(
self._configuration = kwargs self,
self._schema = str(self._configuration.get("schema", "http")) schema: str = "http",
self._host = str(self._configuration.get("hosts", ["127.0.0.1:11222"])[0]) user: str = "",
self._default_node = self._schema + "://" + self._host password: str = "",
self._cache_url = str(self._configuration.get("cache_url", "/rest/v2/caches")) hosts: List[str] = ["127.0.0.1:11222"],
self._schema_url = str(self._configuration.get("cache_url", "/rest/v2/schemas")) cache_url: str = "/rest/v2/caches",
self._use_post_for_query = str( schema_url: str = "/rest/v2/schemas",
self._configuration.get("use_post_for_query", True) use_post_for_query: bool = True,
) http2: bool = True,
verify: bool = True,
**kwargs: Any,
):
"""
Parameters
----------
schema: str
Schema for HTTP request: "http" or "https". Default "http"
user, password: str
User and password if auth is required. Default None
hosts: List[str]
List of server addresses. Default ["127.0.0.1:11222"]
cache_url: str
URL endpoint for cache API. Default "/rest/v2/caches"
schema_url: str
URL endpoint for schema API. Default "/rest/v2/schemas"
use_post_for_query: bool
Whether POST method should be used for query. Default True
http2: bool
Whether HTTP/2 protocol should be used. `pip install "httpx[http2]"` is
needed for HTTP/2. Default True
verify: bool
Whether TLS certificate must be verified. Default True
"""
def req_query( try:
self, query: str, cache_name: str, local: bool = False import httpx
) -> requests.Response: except ImportError:
raise ImportError(
"Could not import httpx python package. "
"Please install it with `pip install httpx`"
'or `pip install "httpx[http2]"` if you need HTTP/2.'
)
self.Codes = httpx.codes
self._configuration = kwargs
self._schema = schema
self._user = user
self._password = password
self._host = hosts[0]
self._default_node = self._schema + "://" + self._host
self._cache_url = cache_url
self._schema_url = schema_url
self._use_post_for_query = use_post_for_query
self._http2 = http2
if self._user and self._password:
if self._schema == "http":
auth: Union[Tuple[str, str], httpx.DigestAuth] = httpx.DigestAuth(
username=self._user, password=self._password
)
else:
auth = (self._user, self._password)
self._h2c = httpx.Client(
http2=self._http2,
http1=not self._http2,
auth=auth,
verify=verify,
)
else:
self._h2c = httpx.Client(
http2=self._http2,
http1=not self._http2,
verify=verify,
)
def req_query(self, query: str, cache_name: str, local: bool = False) -> Response:
"""Request a query """Request a query
Args: Args:
query(str): query requested query(str): query requested
@ -409,7 +529,7 @@ class Infinispan:
def _query_post( def _query_post(
self, query_str: str, cache_name: str, local: bool = False self, query_str: str, cache_name: str, local: bool = False
) -> requests.Response: ) -> Response:
api_url = ( api_url = (
self._default_node self._default_node
+ self._cache_url + self._cache_url
@ -420,9 +540,9 @@ class Infinispan:
) )
data = {"query": query_str} data = {"query": query_str}
data_json = json.dumps(data) data_json = json.dumps(data)
response = requests.post( response = self._h2c.post(
api_url, api_url,
data_json, content=data_json,
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT, timeout=REST_TIMEOUT,
) )
@ -430,7 +550,7 @@ class Infinispan:
def _query_get( def _query_get(
self, query_str: str, cache_name: str, local: bool = False self, query_str: str, cache_name: str, local: bool = False
) -> requests.Response: ) -> Response:
api_url = ( api_url = (
self._default_node self._default_node
+ self._cache_url + self._cache_url
@ -441,10 +561,10 @@ class Infinispan:
+ "&local=" + "&local="
+ str(local) + str(local)
) )
response = requests.get(api_url, timeout=REST_TIMEOUT) response = self._h2c.get(api_url, timeout=REST_TIMEOUT)
return response return response
def post(self, key: str, data: str, cache_name: str) -> requests.Response: def post(self, key: str, data: str, cache_name: str) -> Response:
"""Post an entry """Post an entry
Args: Args:
key(str): key of the entry key(str): key of the entry
@ -454,15 +574,15 @@ class Infinispan:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
response = requests.post( response = self._h2c.post(
api_url, api_url,
data, content=data,
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT, timeout=REST_TIMEOUT,
) )
return response return response
def put(self, key: str, data: str, cache_name: str) -> requests.Response: def put(self, key: str, data: str, cache_name: str) -> Response:
"""Put an entry """Put an entry
Args: Args:
key(str): key of the entry key(str): key of the entry
@ -472,15 +592,15 @@ class Infinispan:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
response = requests.put( response = self._h2c.put(
api_url, api_url,
data, content=data,
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT, timeout=REST_TIMEOUT,
) )
return response return response
def get(self, key: str, cache_name: str) -> requests.Response: def get(self, key: str, cache_name: str) -> Response:
"""Get an entry """Get an entry
Args: Args:
key(str): key of the entry key(str): key of the entry
@ -489,12 +609,12 @@ class Infinispan:
An http Response containing the entry or errors An http Response containing the entry or errors
""" """
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
response = requests.get( response = self._h2c.get(
api_url, headers={"Content-Type": "application/json"}, timeout=REST_TIMEOUT api_url, headers={"Content-Type": "application/json"}, timeout=REST_TIMEOUT
) )
return response return response
def schema_post(self, name: str, proto: str) -> requests.Response: def schema_post(self, name: str, proto: str) -> Response:
"""Deploy a schema """Deploy a schema
Args: Args:
name(str): name of the schema. Will be used as a key name(str): name of the schema. Will be used as a key
@ -503,10 +623,10 @@ class Infinispan:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
api_url = self._default_node + self._schema_url + "/" + name api_url = self._default_node + self._schema_url + "/" + name
response = requests.post(api_url, proto, timeout=REST_TIMEOUT) response = self._h2c.post(api_url, content=proto, timeout=REST_TIMEOUT)
return response return response
def cache_post(self, name: str, config: str) -> requests.Response: def cache_post(self, name: str, config: str) -> Response:
"""Create a cache """Create a cache
Args: Args:
name(str): name of the cache. name(str): name of the cache.
@ -515,15 +635,15 @@ class Infinispan:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
api_url = self._default_node + self._cache_url + "/" + name api_url = self._default_node + self._cache_url + "/" + name
response = requests.post( response = self._h2c.post(
api_url, api_url,
config, content=config,
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
timeout=REST_TIMEOUT, timeout=REST_TIMEOUT,
) )
return response return response
def schema_delete(self, name: str) -> requests.Response: def schema_delete(self, name: str) -> Response:
"""Delete a schema """Delete a schema
Args: Args:
name(str): name of the schema. name(str): name of the schema.
@ -531,10 +651,10 @@ class Infinispan:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
api_url = self._default_node + self._schema_url + "/" + name api_url = self._default_node + self._schema_url + "/" + name
response = requests.delete(api_url, timeout=REST_TIMEOUT) response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
return response return response
def cache_delete(self, name: str) -> requests.Response: def cache_delete(self, name: str) -> Response:
"""Delete a cache """Delete a cache
Args: Args:
name(str): name of the cache. name(str): name of the cache.
@ -542,10 +662,10 @@ class Infinispan:
An http Response containing the result of the operation An http Response containing the result of the operation
""" """
api_url = self._default_node + self._cache_url + "/" + name api_url = self._default_node + self._cache_url + "/" + name
response = requests.delete(api_url, timeout=REST_TIMEOUT) response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
return response return response
def cache_clear(self, cache_name: str) -> requests.Response: def cache_clear(self, cache_name: str) -> Response:
"""Clear a cache """Clear a cache
Args: Args:
cache_name(str): name of the cache. cache_name(str): name of the cache.
@ -555,7 +675,7 @@ class Infinispan:
api_url = ( api_url = (
self._default_node + self._cache_url + "/" + cache_name + "?action=clear" self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
) )
response = requests.post(api_url, timeout=REST_TIMEOUT) response = self._h2c.post(api_url, timeout=REST_TIMEOUT)
return response return response
def cache_exists(self, cache_name: str) -> bool: def cache_exists(self, cache_name: str) -> bool:
@ -570,18 +690,17 @@ class Infinispan:
) )
return self.resource_exists(api_url) return self.resource_exists(api_url)
@staticmethod def resource_exists(self, api_url: str) -> bool:
def resource_exists(api_url: str) -> bool:
"""Check if a resource exists """Check if a resource exists
Args: Args:
api_url(str): url of the resource. api_url(str): url of the resource.
Returns: Returns:
true if resource exists true if resource exists
""" """
response = requests.head(api_url, timeout=REST_TIMEOUT) response = self._h2c.head(api_url, timeout=REST_TIMEOUT)
return response.ok return response.status_code == self.Codes.OK
def index_clear(self, cache_name: str) -> requests.Response: def index_clear(self, cache_name: str) -> Response:
"""Clear an index on a cache """Clear an index on a cache
Args: Args:
cache_name(str): name of the cache. cache_name(str): name of the cache.
@ -595,9 +714,9 @@ class Infinispan:
+ cache_name + cache_name
+ "/search/indexes?action=clear" + "/search/indexes?action=clear"
) )
return requests.post(api_url, timeout=REST_TIMEOUT) return self._h2c.post(api_url, timeout=REST_TIMEOUT)
def index_reindex(self, cache_name: str) -> requests.Response: def index_reindex(self, cache_name: str) -> Response:
"""Rebuild index on a cache """Rebuild index on a cache
Args: Args:
cache_name(str): name of the cache. cache_name(str): name of the cache.
@ -611,4 +730,4 @@ class Infinispan:
+ cache_name + cache_name
+ "/search/indexes?action=reindex" + "/search/indexes?action=reindex"
) )
return requests.post(api_url, timeout=REST_TIMEOUT) return self._h2c.post(api_url, timeout=REST_TIMEOUT)

View File

@ -0,0 +1,4 @@
#/bin/sh
cd infinispan
docker compose up

View File

@ -0,0 +1,2 @@
#Fri May 03 10:19:58 CEST 2024
user=ADMIN,admin

View File

@ -0,0 +1,62 @@
<infinispan
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:infinispan:config:15.0 https://infinispan.org/schemas/infinispan-config-15.0.xsd
urn:infinispan:server:15.0 https://infinispan.org/schemas/infinispan-server-15.0.xsd"
xmlns="urn:infinispan:config:15.0"
xmlns:server="urn:infinispan:server:15.0">
<cache-container name="default" statistics="true">
<transport cluster="${infinispan.cluster.name:cluster}" stack="${infinispan.cluster.stack:tcp}" node-name="${infinispan.node.name:}"/>
</cache-container>
<server xmlns="urn:infinispan:server:15.0">
<interfaces>
<interface name="public">
<inet-address value="${infinispan.bind.address:127.0.0.1}"/>
</interface>
</interfaces>
<socket-bindings default-interface="public" port-offset="${infinispan.socket.binding.port-offset:0}">
<socket-binding name="default" port="${infinispan.bind.port:11222}"/>
<socket-binding name="authenticated" port="11232"/>
<socket-binding name="auth-tls" port="11242"/>
</socket-bindings>
<security>
<credential-stores>
<credential-store name="credentials" path="credentials.pfx">
<clear-text-credential clear-text="secret"/>
</credential-store>
</credential-stores>
<security-realms>
<security-realm name="default">
<properties-realm groups-attribute="Roles">
<user-properties path="/user-config/users.properties"/>
<group-properties path="/user-config/groups.properties"/>
</properties-realm>
</security-realm>
<security-realm name="tls">
<!-- Uncomment to enable TLS on the realm -->
<server-identities>
<ssl>
<keystore path="application.keystore"
password="password" alias="server"
generate-self-signed-certificate-host="localhost"/>
</ssl>
</server-identities>
<properties-realm groups-attribute="Roles">
<user-properties path="/user-config/users.properties"/>
<group-properties path="/user-config/groups.properties"/>
</properties-realm>
</security-realm>
</security-realms>
</security>
<endpoints>
<endpoint socket-binding="default"/>
<endpoint socket-binding="authenticated" security-realm="default"/>
<endpoint socket-binding="auth-tls" security-realm="tls"/>
</endpoints>
</server>
</infinispan>

View File

@ -0,0 +1,4 @@
#$REALM_NAME=default$
#$ALGORITHM=encrypted$
#Fri May 03 10:19:58 CEST 2024
user=scram-sha-1\:BYGcIAws2gznU/kpezoSb1VQNVd+YMX9r+9SAINFoZtPHaHTAQ\=\=;scram-sha-256\:BYGcIAwRiWiD+8f7dyQEs1Wsum/64MOcjGJ2UcmZFQB6DZJqwRDJ4NrvII4NttmxlA\=\=;scram-sha-384\:BYGcIAz+Eud65N8GWK4TMwhSCZpeE5EFSdynywdryQj3ZwBEgv+KF8hRUuGxiq3EyRxsby6w7DHK3CICGZLsPrM\=;scram-sha-512\:BYGcIAwWxVY9DHn42kHydivyU3s9LSPmyfPPJkIFYyt/XsMASFHGoy5rzk4ahX4HjpJgb+NjdCwhGfi33CY0azUIrn439s62Yg5mq9i+ISto;digest-md5\:AgR1c2VyB2RlZmF1bHSYYyzPjRDR7MhrsdFSK03P;digest-sha\:AgR1c2VyB2RlZmF1bHTga5gDNnNYh7/2HqhBVOdUHjBzhw\=\=;digest-sha-256\:AgR1c2VyB2RlZmF1bHTig5qZQIxqtJBTUp3EMh5UIFoS4qOhz9Uk5aOW9ZKCfw\=\=;digest-sha-384\:AgR1c2VyB2RlZmF1bHT01pAN/pRMLS5afm4Q9S0kuLlA0NokuP8F0AISTwXCb1E8RMsFHlBVPOa5rC6Nyso\=;digest-sha-512\:AgR1c2VyB2RlZmF1bHTi+cHn1Ez2Ze41CvPXb9eP/7JmRys7m1f5qPMQWhAmDOuuUXNWEG4yKSI9k2EZgQvMKTd5hDbR24ul1BsYP8X5;

View File

@ -0,0 +1,16 @@
version: "3.7"
services:
infinispan:
image: quay.io/infinispan/server:15.0
ports:
- '11222:11222'
- '11232:11232'
- '11242:11242'
deploy:
resources:
limits:
memory: 25Gb
volumes:
- ./conf:/user-config
command: -c /user-config/infinispan.xml

View File

@ -1,7 +1,9 @@
"""Test Infinispan functionality.""" """Test Infinispan functionality."""
import warnings
from typing import Any, List, Optional from typing import Any, List, Optional
import httpx
import pytest import pytest
from langchain_core.documents import Document from langchain_core.documents import Document
@ -11,9 +13,18 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
fake_texts, fake_texts,
) )
"""
cd tests/integration_tests/vectorstores/docker-compose
./infinispan.sh
def _infinispan_setup_noautoconf() -> None: Current Infinispan implementation relies on httpx: `pip install "httpx[http2]"`
ispnvs = InfinispanVS(auto_config=False) if not installed. HTTP/2 is enable by default, if it's not
wanted use `pip install "httpx"`.
"""
def _infinispan_setup_noautoconf(**kwargs: Any) -> None:
ispnvs = InfinispanVS(http2=_hasHttp2(), auto_config=False, **kwargs)
ispnvs.cache_delete() ispnvs.cache_delete()
ispnvs.schema_delete() ispnvs.schema_delete()
proto = """ proto = """
@ -54,64 +65,104 @@ def _infinispanvs_from_texts(
ids=ids, ids=ids,
clear_old=clear_old, clear_old=clear_old,
auto_config=auto_config, auto_config=auto_config,
http2=_hasHttp2(),
**kwargs, **kwargs,
) )
def _hasHttp2() -> bool:
try:
httpx.Client(http2=True)
return True
except Exception:
return False
@pytest.mark.parametrize("autoconfig", [False, True]) @pytest.mark.parametrize("autoconfig", [False, True])
@pytest.mark.parametrize(
"conn_opts",
[
{},
{
"user": "user",
"password": "password",
"hosts": ["localhost:11232"],
"schema": "http",
},
{
"user": "user",
"password": "password",
"hosts": ["localhost:11242"],
"schema": "https",
"verify": False,
},
],
)
class TestBasic: class TestBasic:
def test_infinispan(self, autoconfig: bool) -> None: def test_infinispan(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""
if not autoconfig: if not autoconfig:
_infinispan_setup_noautoconf() _infinispan_setup_noautoconf(**conn_opts)
docsearch = _infinispanvs_from_texts(auto_config=autoconfig) docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
output = docsearch.similarity_search("foo", k=1) output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")] assert output == [Document(page_content="foo")]
def test_infinispan_with_metadata(self, autoconfig: bool) -> None: def test_infinispan_with_auth(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and search."""
if not autoconfig:
_infinispan_setup_noautoconf(**conn_opts)
docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_infinispan_with_metadata(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test with metadata""" """Test with metadata"""
if not autoconfig: if not autoconfig:
_infinispan_setup_noautoconf() _infinispan_setup_noautoconf(**conn_opts)
meta = [] meta = []
for _ in range(len(fake_texts)): for _ in range(len(fake_texts)):
meta.append({"label": "test"}) meta.append({"label": "test"})
docsearch = _infinispanvs_from_texts(metadatas=meta, auto_config=autoconfig) docsearch = _infinispanvs_from_texts(
metadatas=meta, auto_config=autoconfig, **conn_opts
)
output = docsearch.similarity_search("foo", k=1) output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"label": "test"})] assert output == [Document(page_content="foo", metadata={"label": "test"})]
def test_infinispan_with_metadata_with_output_fields( def test_infinispan_with_metadata_with_output_fields(
self, autoconfig: bool self, autoconfig: bool, conn_opts: dict
) -> None: ) -> None:
"""Test with metadata""" """Test with metadata"""
if not autoconfig: if not autoconfig:
_infinispan_setup_noautoconf() _infinispan_setup_noautoconf(**conn_opts)
metadatas = [ metadatas = [
{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts)) {"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))
] ]
c = {"output_fields": ["label", "page", "text"]} c = {"output_fields": ["label", "page", "text"]}
docsearch = _infinispanvs_from_texts( docsearch = _infinispanvs_from_texts(
metadatas=metadatas, configuration=c, auto_config=autoconfig metadatas=metadatas, configuration=c, auto_config=autoconfig, **conn_opts
) )
output = docsearch.similarity_search("foo", k=1) output = docsearch.similarity_search("foo", k=1)
assert output == [ assert output == [
Document(page_content="foo", metadata={"label": "label0", "page": 0}) Document(page_content="foo", metadata={"label": "label0", "page": 0})
] ]
def test_infinispanvs_with_id(self, autoconfig: bool) -> None: def test_infinispanvs_with_id(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test with ids""" """Test with ids"""
ids = ["id_" + str(i) for i in range(len(fake_texts))] ids = ["id_" + str(i) for i in range(len(fake_texts))]
docsearch = _infinispanvs_from_texts(ids=ids, auto_config=autoconfig) docsearch = _infinispanvs_from_texts(
ids=ids, auto_config=autoconfig, **conn_opts
)
output = docsearch.similarity_search("foo", k=1) output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")] assert output == [Document(page_content="foo")]
def test_infinispan_with_score(self, autoconfig: bool) -> None: def test_infinispan_with_score(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and search with scores and IDs.""" """Test end to end construction and search with scores and IDs."""
if not autoconfig: if not autoconfig:
_infinispan_setup_noautoconf() _infinispan_setup_noautoconf(**conn_opts)
texts = ["foo", "bar", "baz"] texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))] metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _infinispanvs_from_texts( docsearch = _infinispanvs_from_texts(
metadatas=metadatas, auto_config=autoconfig metadatas=metadatas, auto_config=autoconfig, **conn_opts
) )
output = docsearch.similarity_search_with_score("foo", k=3) output = docsearch.similarity_search_with_score("foo", k=3)
docs = [o[0] for o in output] docs = [o[0] for o in output]
@ -123,14 +174,14 @@ class TestBasic:
] ]
assert scores[0] >= scores[1] >= scores[2] assert scores[0] >= scores[1] >= scores[2]
def test_infinispan_add_texts(self, autoconfig: bool) -> None: def test_infinispan_add_texts(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and MRR search.""" """Test end to end construction and MRR search."""
if not autoconfig: if not autoconfig:
_infinispan_setup_noautoconf() _infinispan_setup_noautoconf(**conn_opts)
texts = ["foo", "bar", "baz"] texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))] metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _infinispanvs_from_texts( docsearch = _infinispanvs_from_texts(
metadatas=metadatas, auto_config=autoconfig metadatas=metadatas, auto_config=autoconfig, **conn_opts
) )
docsearch.add_texts(texts, metadatas) docsearch.add_texts(texts, metadatas)
@ -138,19 +189,22 @@ class TestBasic:
output = docsearch.similarity_search("foo", k=10) output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6 assert len(output) == 6
def test_infinispan_no_clear_old(self, autoconfig: bool) -> None: def test_infinispan_no_clear_old(self, autoconfig: bool, conn_opts: dict) -> None:
"""Test end to end construction and MRR search.""" """Test end to end construction and MRR search."""
if not autoconfig: if not autoconfig:
_infinispan_setup_noautoconf() _infinispan_setup_noautoconf(**conn_opts)
texts = ["foo", "bar", "baz"] texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))] metadatas = [{"page": i} for i in range(len(texts))]
docsearch = _infinispanvs_from_texts( docsearch = _infinispanvs_from_texts(
metadatas=metadatas, auto_config=autoconfig metadatas=metadatas, auto_config=autoconfig, **conn_opts
) )
del docsearch del docsearch
try: try:
docsearch = _infinispanvs_from_texts( docsearch = _infinispanvs_from_texts(
metadatas=metadatas, clear_old=False, auto_config=autoconfig metadatas=metadatas,
clear_old=False,
auto_config=autoconfig,
**conn_opts,
) )
except AssertionError: except AssertionError:
if autoconfig: if autoconfig:
@ -159,3 +213,12 @@ class TestBasic:
raise raise
output = docsearch.similarity_search("foo", k=10) output = docsearch.similarity_search("foo", k=10)
assert len(output) == 6 assert len(output) == 6
class TestHttp2:
def test_http2(self) -> None:
try:
httpx.Client(http2=True)
except Exception:
warnings.warn('pip install "httpx[http2]" if you need HTTP/2')
pass