mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-19 11:08:55 +00:00
community[minor]: VectorStore Infinispan. Adding TLS and authentication (#23522)
**Description**: this PR enable VectorStore TLS and authentication (digest, basic) with HTTP/2 for Infinispan server. Based on httpx. Added docker-compose facilities for testing Added documentation **Dependencies:** requires `pip install httpx[http2]` if HTTP2 is needed **Twitter handle:** https://twitter.com/infinispan
This commit is contained in:
parent
ff925d2ddc
commit
7da2efd9d3
@ -5,9 +5,10 @@ from __future__ import annotations
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any, Iterable, List, Optional, Tuple, Type, cast
|
import warnings
|
||||||
|
from typing import Any, Iterable, List, Optional, Tuple, Type, Union, cast
|
||||||
|
|
||||||
import requests
|
from httpx import Response
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
@ -49,7 +50,7 @@ class InfinispanVS(VectorStore):
|
|||||||
embedding=RGBEmbeddings(),
|
embedding=RGBEmbeddings(),
|
||||||
output_fields: ["texture", "color"],
|
output_fields: ["texture", "color"],
|
||||||
lambda_key: lambda text,meta: str(meta["_key"]),
|
lambda_key: lambda text,meta: str(meta["_key"]),
|
||||||
lambda_content: lambda item: item["color"]})
|
lambda_content: lambda item: item["color"])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -58,13 +59,48 @@ class InfinispanVS(VectorStore):
|
|||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cache_name: str
|
||||||
|
Embeddings cache name. Default "vector"
|
||||||
|
entity_name: str
|
||||||
|
Protobuf entity name for the embeddings. Default "vector"
|
||||||
|
text_field: str
|
||||||
|
Protobuf field name for text. Default "text"
|
||||||
|
vector_field: str
|
||||||
|
Protobuf field name for vector. Default "vector"
|
||||||
|
lambda_content: lambda
|
||||||
|
Lambda returning the content part of an item. Default returns text_field
|
||||||
|
lambda_metadata: lambda
|
||||||
|
Lambda returning the metadata part of an item. Default returns items
|
||||||
|
fields excepts text_field, vector_field, _type
|
||||||
|
output_fields: List[str]
|
||||||
|
List of fields to be returned from item, if None return all fields.
|
||||||
|
Default None
|
||||||
|
kwargs: Any
|
||||||
|
Rest of arguments passed to Infinispan. See docs"""
|
||||||
self.ispn = Infinispan(**kwargs)
|
self.ispn = Infinispan(**kwargs)
|
||||||
self._configuration = kwargs
|
self._configuration = kwargs
|
||||||
self._cache_name = str(self._configuration.get("cache_name", "vector"))
|
self._cache_name = str(self._configuration.get("cache_name", "vector"))
|
||||||
self._entity_name = str(self._configuration.get("entity_name", "vector"))
|
self._entity_name = str(self._configuration.get("entity_name", "vector"))
|
||||||
self._embedding = embedding
|
self._embedding = embedding
|
||||||
self._textfield = self._configuration.get("textfield", "text")
|
self._textfield = self._configuration.get("textfield", "")
|
||||||
self._vectorfield = self._configuration.get("vectorfield", "vector")
|
if self._textfield == "":
|
||||||
|
self._textfield = self._configuration.get("text_field", "text")
|
||||||
|
else:
|
||||||
|
warnings.warn(
|
||||||
|
"`textfield` is deprecated. Please use `text_field` " "param.",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
self._vectorfield = self._configuration.get("vectorfield", "")
|
||||||
|
if self._vectorfield == "":
|
||||||
|
self._vectorfield = self._configuration.get("vector_field", "vector")
|
||||||
|
else:
|
||||||
|
warnings.warn(
|
||||||
|
"`vectorfield` is deprecated. Please use `vector_field` " "param.",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
self._to_content = self._configuration.get(
|
self._to_content = self._configuration.get(
|
||||||
"lambda_content", lambda item: self._default_content(item)
|
"lambda_content", lambda item: self._default_content(item)
|
||||||
)
|
)
|
||||||
@ -121,7 +157,7 @@ repeated float %s = 1;
|
|||||||
metadata_proto += "}\n"
|
metadata_proto += "}\n"
|
||||||
return metadata_proto
|
return metadata_proto
|
||||||
|
|
||||||
def schema_create(self, proto: str) -> requests.Response:
|
def schema_create(self, proto: str) -> Response:
|
||||||
"""Deploy the schema for the vector db
|
"""Deploy the schema for the vector db
|
||||||
Args:
|
Args:
|
||||||
proto(str): protobuf schema
|
proto(str): protobuf schema
|
||||||
@ -130,14 +166,14 @@ repeated float %s = 1;
|
|||||||
"""
|
"""
|
||||||
return self.ispn.schema_post(self._entity_name + ".proto", proto)
|
return self.ispn.schema_post(self._entity_name + ".proto", proto)
|
||||||
|
|
||||||
def schema_delete(self) -> requests.Response:
|
def schema_delete(self) -> Response:
|
||||||
"""Delete the schema for the vector db
|
"""Delete the schema for the vector db
|
||||||
Returns:
|
Returns:
|
||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
return self.ispn.schema_delete(self._entity_name + ".proto")
|
return self.ispn.schema_delete(self._entity_name + ".proto")
|
||||||
|
|
||||||
def cache_create(self, config: str = "") -> requests.Response:
|
def cache_create(self, config: str = "") -> Response:
|
||||||
"""Create the cache for the vector db
|
"""Create the cache for the vector db
|
||||||
Args:
|
Args:
|
||||||
config(str): configuration of the cache.
|
config(str): configuration of the cache.
|
||||||
@ -172,14 +208,14 @@ repeated float %s = 1;
|
|||||||
)
|
)
|
||||||
return self.ispn.cache_post(self._cache_name, config)
|
return self.ispn.cache_post(self._cache_name, config)
|
||||||
|
|
||||||
def cache_delete(self) -> requests.Response:
|
def cache_delete(self) -> Response:
|
||||||
"""Delete the cache for the vector db
|
"""Delete the cache for the vector db
|
||||||
Returns:
|
Returns:
|
||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
return self.ispn.cache_delete(self._cache_name)
|
return self.ispn.cache_delete(self._cache_name)
|
||||||
|
|
||||||
def cache_clear(self) -> requests.Response:
|
def cache_clear(self) -> Response:
|
||||||
"""Clear the cache for the vector db
|
"""Clear the cache for the vector db
|
||||||
Returns:
|
Returns:
|
||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
@ -193,14 +229,14 @@ repeated float %s = 1;
|
|||||||
"""
|
"""
|
||||||
return self.ispn.cache_exists(self._cache_name)
|
return self.ispn.cache_exists(self._cache_name)
|
||||||
|
|
||||||
def cache_index_clear(self) -> requests.Response:
|
def cache_index_clear(self) -> Response:
|
||||||
"""Clear the index for the vector db
|
"""Clear the index for the vector db
|
||||||
Returns:
|
Returns:
|
||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
return self.ispn.index_clear(self._cache_name)
|
return self.ispn.index_clear(self._cache_name)
|
||||||
|
|
||||||
def cache_index_reindex(self) -> requests.Response:
|
def cache_index_reindex(self) -> Response:
|
||||||
"""Rebuild the for the vector db
|
"""Rebuild the for the vector db
|
||||||
Returns:
|
Returns:
|
||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
@ -325,12 +361,16 @@ repeated float %s = 1;
|
|||||||
def configure(self, metadata: dict, dimension: int) -> None:
|
def configure(self, metadata: dict, dimension: int) -> None:
|
||||||
schema = self.schema_builder(metadata, dimension)
|
schema = self.schema_builder(metadata, dimension)
|
||||||
output = self.schema_create(schema)
|
output = self.schema_create(schema)
|
||||||
assert output.ok, "Unable to create schema. Already exists? "
|
assert (
|
||||||
|
output.status_code == self.ispn.Codes.OK
|
||||||
|
), "Unable to create schema. Already exists? "
|
||||||
"Consider using clear_old=True"
|
"Consider using clear_old=True"
|
||||||
assert json.loads(output.text)["error"] is None
|
assert json.loads(output.text)["error"] is None
|
||||||
if not self.cache_exists():
|
if not self.cache_exists():
|
||||||
output = self.cache_create()
|
output = self.cache_create()
|
||||||
assert output.ok, "Unable to create cache. Already exists? "
|
assert (
|
||||||
|
output.status_code == self.ispn.Codes.OK
|
||||||
|
), "Unable to create cache. Already exists? "
|
||||||
"Consider using clear_old=True"
|
"Consider using clear_old=True"
|
||||||
# Ensure index is clean
|
# Ensure index is clean
|
||||||
self.cache_index_clear()
|
self.cache_index_clear()
|
||||||
@ -350,7 +390,24 @@ repeated float %s = 1;
|
|||||||
auto_config: Optional[bool] = True,
|
auto_config: Optional[bool] = True,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> InfinispanVS:
|
) -> InfinispanVS:
|
||||||
"""Return VectorStore initialized from texts and embeddings."""
|
"""Return VectorStore initialized from texts and embeddings.
|
||||||
|
|
||||||
|
In addition to parameters described by the super method, this
|
||||||
|
implementation provides other configuration params if different
|
||||||
|
configuration from default is needed.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ids : List[str]
|
||||||
|
Additional list of keys associated to the embedding. If not
|
||||||
|
provided UUIDs will be generated
|
||||||
|
clear_old : bool
|
||||||
|
Whether old data must be deleted. Default True
|
||||||
|
auto_config: bool
|
||||||
|
Whether to do a complete server setup (caches,
|
||||||
|
protobuf definition...). Default True
|
||||||
|
kwargs: Any
|
||||||
|
Rest of arguments passed to InfinispanVS. See docs"""
|
||||||
infinispanvs = cls(embedding=embedding, ids=ids, **kwargs)
|
infinispanvs = cls(embedding=embedding, ids=ids, **kwargs)
|
||||||
if auto_config and len(metadatas or []) > 0:
|
if auto_config and len(metadatas or []) > 0:
|
||||||
if clear_old:
|
if clear_old:
|
||||||
@ -381,20 +438,83 @@ class Infinispan:
|
|||||||
https://github.com/rigazilla/infinispan-vector#run-infinispan
|
https://github.com/rigazilla/infinispan-vector#run-infinispan
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs: Any):
|
def __init__(
|
||||||
self._configuration = kwargs
|
self,
|
||||||
self._schema = str(self._configuration.get("schema", "http"))
|
schema: str = "http",
|
||||||
self._host = str(self._configuration.get("hosts", ["127.0.0.1:11222"])[0])
|
user: str = "",
|
||||||
self._default_node = self._schema + "://" + self._host
|
password: str = "",
|
||||||
self._cache_url = str(self._configuration.get("cache_url", "/rest/v2/caches"))
|
hosts: List[str] = ["127.0.0.1:11222"],
|
||||||
self._schema_url = str(self._configuration.get("cache_url", "/rest/v2/schemas"))
|
cache_url: str = "/rest/v2/caches",
|
||||||
self._use_post_for_query = str(
|
schema_url: str = "/rest/v2/schemas",
|
||||||
self._configuration.get("use_post_for_query", True)
|
use_post_for_query: bool = True,
|
||||||
)
|
http2: bool = True,
|
||||||
|
verify: bool = True,
|
||||||
|
**kwargs: Any,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
schema: str
|
||||||
|
Schema for HTTP request: "http" or "https". Default "http"
|
||||||
|
user, password: str
|
||||||
|
User and password if auth is required. Default None
|
||||||
|
hosts: List[str]
|
||||||
|
List of server addresses. Default ["127.0.0.1:11222"]
|
||||||
|
cache_url: str
|
||||||
|
URL endpoint for cache API. Default "/rest/v2/caches"
|
||||||
|
schema_url: str
|
||||||
|
URL endpoint for schema API. Default "/rest/v2/schemas"
|
||||||
|
use_post_for_query: bool
|
||||||
|
Whether POST method should be used for query. Default True
|
||||||
|
http2: bool
|
||||||
|
Whether HTTP/2 protocol should be used. `pip install "httpx[http2]"` is
|
||||||
|
needed for HTTP/2. Default True
|
||||||
|
verify: bool
|
||||||
|
Whether TLS certificate must be verified. Default True
|
||||||
|
"""
|
||||||
|
|
||||||
def req_query(
|
try:
|
||||||
self, query: str, cache_name: str, local: bool = False
|
import httpx
|
||||||
) -> requests.Response:
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Could not import httpx python package. "
|
||||||
|
"Please install it with `pip install httpx`"
|
||||||
|
'or `pip install "httpx[http2]"` if you need HTTP/2.'
|
||||||
|
)
|
||||||
|
|
||||||
|
self.Codes = httpx.codes
|
||||||
|
|
||||||
|
self._configuration = kwargs
|
||||||
|
self._schema = schema
|
||||||
|
self._user = user
|
||||||
|
self._password = password
|
||||||
|
self._host = hosts[0]
|
||||||
|
self._default_node = self._schema + "://" + self._host
|
||||||
|
self._cache_url = cache_url
|
||||||
|
self._schema_url = schema_url
|
||||||
|
self._use_post_for_query = use_post_for_query
|
||||||
|
self._http2 = http2
|
||||||
|
if self._user and self._password:
|
||||||
|
if self._schema == "http":
|
||||||
|
auth: Union[Tuple[str, str], httpx.DigestAuth] = httpx.DigestAuth(
|
||||||
|
username=self._user, password=self._password
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
auth = (self._user, self._password)
|
||||||
|
self._h2c = httpx.Client(
|
||||||
|
http2=self._http2,
|
||||||
|
http1=not self._http2,
|
||||||
|
auth=auth,
|
||||||
|
verify=verify,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._h2c = httpx.Client(
|
||||||
|
http2=self._http2,
|
||||||
|
http1=not self._http2,
|
||||||
|
verify=verify,
|
||||||
|
)
|
||||||
|
|
||||||
|
def req_query(self, query: str, cache_name: str, local: bool = False) -> Response:
|
||||||
"""Request a query
|
"""Request a query
|
||||||
Args:
|
Args:
|
||||||
query(str): query requested
|
query(str): query requested
|
||||||
@ -409,7 +529,7 @@ class Infinispan:
|
|||||||
|
|
||||||
def _query_post(
|
def _query_post(
|
||||||
self, query_str: str, cache_name: str, local: bool = False
|
self, query_str: str, cache_name: str, local: bool = False
|
||||||
) -> requests.Response:
|
) -> Response:
|
||||||
api_url = (
|
api_url = (
|
||||||
self._default_node
|
self._default_node
|
||||||
+ self._cache_url
|
+ self._cache_url
|
||||||
@ -420,9 +540,9 @@ class Infinispan:
|
|||||||
)
|
)
|
||||||
data = {"query": query_str}
|
data = {"query": query_str}
|
||||||
data_json = json.dumps(data)
|
data_json = json.dumps(data)
|
||||||
response = requests.post(
|
response = self._h2c.post(
|
||||||
api_url,
|
api_url,
|
||||||
data_json,
|
content=data_json,
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
timeout=REST_TIMEOUT,
|
timeout=REST_TIMEOUT,
|
||||||
)
|
)
|
||||||
@ -430,7 +550,7 @@ class Infinispan:
|
|||||||
|
|
||||||
def _query_get(
|
def _query_get(
|
||||||
self, query_str: str, cache_name: str, local: bool = False
|
self, query_str: str, cache_name: str, local: bool = False
|
||||||
) -> requests.Response:
|
) -> Response:
|
||||||
api_url = (
|
api_url = (
|
||||||
self._default_node
|
self._default_node
|
||||||
+ self._cache_url
|
+ self._cache_url
|
||||||
@ -441,10 +561,10 @@ class Infinispan:
|
|||||||
+ "&local="
|
+ "&local="
|
||||||
+ str(local)
|
+ str(local)
|
||||||
)
|
)
|
||||||
response = requests.get(api_url, timeout=REST_TIMEOUT)
|
response = self._h2c.get(api_url, timeout=REST_TIMEOUT)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def post(self, key: str, data: str, cache_name: str) -> requests.Response:
|
def post(self, key: str, data: str, cache_name: str) -> Response:
|
||||||
"""Post an entry
|
"""Post an entry
|
||||||
Args:
|
Args:
|
||||||
key(str): key of the entry
|
key(str): key of the entry
|
||||||
@ -454,15 +574,15 @@ class Infinispan:
|
|||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
||||||
response = requests.post(
|
response = self._h2c.post(
|
||||||
api_url,
|
api_url,
|
||||||
data,
|
content=data,
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
timeout=REST_TIMEOUT,
|
timeout=REST_TIMEOUT,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def put(self, key: str, data: str, cache_name: str) -> requests.Response:
|
def put(self, key: str, data: str, cache_name: str) -> Response:
|
||||||
"""Put an entry
|
"""Put an entry
|
||||||
Args:
|
Args:
|
||||||
key(str): key of the entry
|
key(str): key of the entry
|
||||||
@ -472,15 +592,15 @@ class Infinispan:
|
|||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
||||||
response = requests.put(
|
response = self._h2c.put(
|
||||||
api_url,
|
api_url,
|
||||||
data,
|
content=data,
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
timeout=REST_TIMEOUT,
|
timeout=REST_TIMEOUT,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def get(self, key: str, cache_name: str) -> requests.Response:
|
def get(self, key: str, cache_name: str) -> Response:
|
||||||
"""Get an entry
|
"""Get an entry
|
||||||
Args:
|
Args:
|
||||||
key(str): key of the entry
|
key(str): key of the entry
|
||||||
@ -489,12 +609,12 @@ class Infinispan:
|
|||||||
An http Response containing the entry or errors
|
An http Response containing the entry or errors
|
||||||
"""
|
"""
|
||||||
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
|
||||||
response = requests.get(
|
response = self._h2c.get(
|
||||||
api_url, headers={"Content-Type": "application/json"}, timeout=REST_TIMEOUT
|
api_url, headers={"Content-Type": "application/json"}, timeout=REST_TIMEOUT
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def schema_post(self, name: str, proto: str) -> requests.Response:
|
def schema_post(self, name: str, proto: str) -> Response:
|
||||||
"""Deploy a schema
|
"""Deploy a schema
|
||||||
Args:
|
Args:
|
||||||
name(str): name of the schema. Will be used as a key
|
name(str): name of the schema. Will be used as a key
|
||||||
@ -503,10 +623,10 @@ class Infinispan:
|
|||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
api_url = self._default_node + self._schema_url + "/" + name
|
api_url = self._default_node + self._schema_url + "/" + name
|
||||||
response = requests.post(api_url, proto, timeout=REST_TIMEOUT)
|
response = self._h2c.post(api_url, content=proto, timeout=REST_TIMEOUT)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def cache_post(self, name: str, config: str) -> requests.Response:
|
def cache_post(self, name: str, config: str) -> Response:
|
||||||
"""Create a cache
|
"""Create a cache
|
||||||
Args:
|
Args:
|
||||||
name(str): name of the cache.
|
name(str): name of the cache.
|
||||||
@ -515,15 +635,15 @@ class Infinispan:
|
|||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
api_url = self._default_node + self._cache_url + "/" + name
|
api_url = self._default_node + self._cache_url + "/" + name
|
||||||
response = requests.post(
|
response = self._h2c.post(
|
||||||
api_url,
|
api_url,
|
||||||
config,
|
content=config,
|
||||||
headers={"Content-Type": "application/json"},
|
headers={"Content-Type": "application/json"},
|
||||||
timeout=REST_TIMEOUT,
|
timeout=REST_TIMEOUT,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def schema_delete(self, name: str) -> requests.Response:
|
def schema_delete(self, name: str) -> Response:
|
||||||
"""Delete a schema
|
"""Delete a schema
|
||||||
Args:
|
Args:
|
||||||
name(str): name of the schema.
|
name(str): name of the schema.
|
||||||
@ -531,10 +651,10 @@ class Infinispan:
|
|||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
api_url = self._default_node + self._schema_url + "/" + name
|
api_url = self._default_node + self._schema_url + "/" + name
|
||||||
response = requests.delete(api_url, timeout=REST_TIMEOUT)
|
response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def cache_delete(self, name: str) -> requests.Response:
|
def cache_delete(self, name: str) -> Response:
|
||||||
"""Delete a cache
|
"""Delete a cache
|
||||||
Args:
|
Args:
|
||||||
name(str): name of the cache.
|
name(str): name of the cache.
|
||||||
@ -542,10 +662,10 @@ class Infinispan:
|
|||||||
An http Response containing the result of the operation
|
An http Response containing the result of the operation
|
||||||
"""
|
"""
|
||||||
api_url = self._default_node + self._cache_url + "/" + name
|
api_url = self._default_node + self._cache_url + "/" + name
|
||||||
response = requests.delete(api_url, timeout=REST_TIMEOUT)
|
response = self._h2c.delete(api_url, timeout=REST_TIMEOUT)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def cache_clear(self, cache_name: str) -> requests.Response:
|
def cache_clear(self, cache_name: str) -> Response:
|
||||||
"""Clear a cache
|
"""Clear a cache
|
||||||
Args:
|
Args:
|
||||||
cache_name(str): name of the cache.
|
cache_name(str): name of the cache.
|
||||||
@ -555,7 +675,7 @@ class Infinispan:
|
|||||||
api_url = (
|
api_url = (
|
||||||
self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
|
self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
|
||||||
)
|
)
|
||||||
response = requests.post(api_url, timeout=REST_TIMEOUT)
|
response = self._h2c.post(api_url, timeout=REST_TIMEOUT)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def cache_exists(self, cache_name: str) -> bool:
|
def cache_exists(self, cache_name: str) -> bool:
|
||||||
@ -570,18 +690,17 @@ class Infinispan:
|
|||||||
)
|
)
|
||||||
return self.resource_exists(api_url)
|
return self.resource_exists(api_url)
|
||||||
|
|
||||||
@staticmethod
|
def resource_exists(self, api_url: str) -> bool:
|
||||||
def resource_exists(api_url: str) -> bool:
|
|
||||||
"""Check if a resource exists
|
"""Check if a resource exists
|
||||||
Args:
|
Args:
|
||||||
api_url(str): url of the resource.
|
api_url(str): url of the resource.
|
||||||
Returns:
|
Returns:
|
||||||
true if resource exists
|
true if resource exists
|
||||||
"""
|
"""
|
||||||
response = requests.head(api_url, timeout=REST_TIMEOUT)
|
response = self._h2c.head(api_url, timeout=REST_TIMEOUT)
|
||||||
return response.ok
|
return response.status_code == self.Codes.OK
|
||||||
|
|
||||||
def index_clear(self, cache_name: str) -> requests.Response:
|
def index_clear(self, cache_name: str) -> Response:
|
||||||
"""Clear an index on a cache
|
"""Clear an index on a cache
|
||||||
Args:
|
Args:
|
||||||
cache_name(str): name of the cache.
|
cache_name(str): name of the cache.
|
||||||
@ -595,9 +714,9 @@ class Infinispan:
|
|||||||
+ cache_name
|
+ cache_name
|
||||||
+ "/search/indexes?action=clear"
|
+ "/search/indexes?action=clear"
|
||||||
)
|
)
|
||||||
return requests.post(api_url, timeout=REST_TIMEOUT)
|
return self._h2c.post(api_url, timeout=REST_TIMEOUT)
|
||||||
|
|
||||||
def index_reindex(self, cache_name: str) -> requests.Response:
|
def index_reindex(self, cache_name: str) -> Response:
|
||||||
"""Rebuild index on a cache
|
"""Rebuild index on a cache
|
||||||
Args:
|
Args:
|
||||||
cache_name(str): name of the cache.
|
cache_name(str): name of the cache.
|
||||||
@ -611,4 +730,4 @@ class Infinispan:
|
|||||||
+ cache_name
|
+ cache_name
|
||||||
+ "/search/indexes?action=reindex"
|
+ "/search/indexes?action=reindex"
|
||||||
)
|
)
|
||||||
return requests.post(api_url, timeout=REST_TIMEOUT)
|
return self._h2c.post(api_url, timeout=REST_TIMEOUT)
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
#/bin/sh
|
||||||
|
|
||||||
|
cd infinispan
|
||||||
|
docker compose up
|
@ -0,0 +1,2 @@
|
|||||||
|
#Fri May 03 10:19:58 CEST 2024
|
||||||
|
user=ADMIN,admin
|
@ -0,0 +1,62 @@
|
|||||||
|
<infinispan
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="urn:infinispan:config:15.0 https://infinispan.org/schemas/infinispan-config-15.0.xsd
|
||||||
|
urn:infinispan:server:15.0 https://infinispan.org/schemas/infinispan-server-15.0.xsd"
|
||||||
|
xmlns="urn:infinispan:config:15.0"
|
||||||
|
xmlns:server="urn:infinispan:server:15.0">
|
||||||
|
|
||||||
|
<cache-container name="default" statistics="true">
|
||||||
|
<transport cluster="${infinispan.cluster.name:cluster}" stack="${infinispan.cluster.stack:tcp}" node-name="${infinispan.node.name:}"/>
|
||||||
|
</cache-container>
|
||||||
|
|
||||||
|
<server xmlns="urn:infinispan:server:15.0">
|
||||||
|
<interfaces>
|
||||||
|
<interface name="public">
|
||||||
|
<inet-address value="${infinispan.bind.address:127.0.0.1}"/>
|
||||||
|
</interface>
|
||||||
|
</interfaces>
|
||||||
|
|
||||||
|
<socket-bindings default-interface="public" port-offset="${infinispan.socket.binding.port-offset:0}">
|
||||||
|
<socket-binding name="default" port="${infinispan.bind.port:11222}"/>
|
||||||
|
<socket-binding name="authenticated" port="11232"/>
|
||||||
|
<socket-binding name="auth-tls" port="11242"/>
|
||||||
|
</socket-bindings>
|
||||||
|
|
||||||
|
<security>
|
||||||
|
<credential-stores>
|
||||||
|
<credential-store name="credentials" path="credentials.pfx">
|
||||||
|
<clear-text-credential clear-text="secret"/>
|
||||||
|
</credential-store>
|
||||||
|
</credential-stores>
|
||||||
|
<security-realms>
|
||||||
|
<security-realm name="default">
|
||||||
|
<properties-realm groups-attribute="Roles">
|
||||||
|
<user-properties path="/user-config/users.properties"/>
|
||||||
|
<group-properties path="/user-config/groups.properties"/>
|
||||||
|
</properties-realm>
|
||||||
|
</security-realm>
|
||||||
|
<security-realm name="tls">
|
||||||
|
<!-- Uncomment to enable TLS on the realm -->
|
||||||
|
<server-identities>
|
||||||
|
<ssl>
|
||||||
|
<keystore path="application.keystore"
|
||||||
|
password="password" alias="server"
|
||||||
|
generate-self-signed-certificate-host="localhost"/>
|
||||||
|
|
||||||
|
</ssl>
|
||||||
|
</server-identities>
|
||||||
|
<properties-realm groups-attribute="Roles">
|
||||||
|
<user-properties path="/user-config/users.properties"/>
|
||||||
|
<group-properties path="/user-config/groups.properties"/>
|
||||||
|
</properties-realm>
|
||||||
|
</security-realm>
|
||||||
|
</security-realms>
|
||||||
|
</security>
|
||||||
|
|
||||||
|
<endpoints>
|
||||||
|
<endpoint socket-binding="default"/>
|
||||||
|
<endpoint socket-binding="authenticated" security-realm="default"/>
|
||||||
|
<endpoint socket-binding="auth-tls" security-realm="tls"/>
|
||||||
|
</endpoints>
|
||||||
|
</server>
|
||||||
|
</infinispan>
|
@ -0,0 +1,4 @@
|
|||||||
|
#$REALM_NAME=default$
|
||||||
|
#$ALGORITHM=encrypted$
|
||||||
|
#Fri May 03 10:19:58 CEST 2024
|
||||||
|
user=scram-sha-1\:BYGcIAws2gznU/kpezoSb1VQNVd+YMX9r+9SAINFoZtPHaHTAQ\=\=;scram-sha-256\:BYGcIAwRiWiD+8f7dyQEs1Wsum/64MOcjGJ2UcmZFQB6DZJqwRDJ4NrvII4NttmxlA\=\=;scram-sha-384\:BYGcIAz+Eud65N8GWK4TMwhSCZpeE5EFSdynywdryQj3ZwBEgv+KF8hRUuGxiq3EyRxsby6w7DHK3CICGZLsPrM\=;scram-sha-512\:BYGcIAwWxVY9DHn42kHydivyU3s9LSPmyfPPJkIFYyt/XsMASFHGoy5rzk4ahX4HjpJgb+NjdCwhGfi33CY0azUIrn439s62Yg5mq9i+ISto;digest-md5\:AgR1c2VyB2RlZmF1bHSYYyzPjRDR7MhrsdFSK03P;digest-sha\:AgR1c2VyB2RlZmF1bHTga5gDNnNYh7/2HqhBVOdUHjBzhw\=\=;digest-sha-256\:AgR1c2VyB2RlZmF1bHTig5qZQIxqtJBTUp3EMh5UIFoS4qOhz9Uk5aOW9ZKCfw\=\=;digest-sha-384\:AgR1c2VyB2RlZmF1bHT01pAN/pRMLS5afm4Q9S0kuLlA0NokuP8F0AISTwXCb1E8RMsFHlBVPOa5rC6Nyso\=;digest-sha-512\:AgR1c2VyB2RlZmF1bHTi+cHn1Ez2Ze41CvPXb9eP/7JmRys7m1f5qPMQWhAmDOuuUXNWEG4yKSI9k2EZgQvMKTd5hDbR24ul1BsYP8X5;
|
@ -0,0 +1,16 @@
|
|||||||
|
version: "3.7"
|
||||||
|
|
||||||
|
services:
|
||||||
|
infinispan:
|
||||||
|
image: quay.io/infinispan/server:15.0
|
||||||
|
ports:
|
||||||
|
- '11222:11222'
|
||||||
|
- '11232:11232'
|
||||||
|
- '11242:11242'
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 25Gb
|
||||||
|
volumes:
|
||||||
|
- ./conf:/user-config
|
||||||
|
command: -c /user-config/infinispan.xml
|
@ -1,7 +1,9 @@
|
|||||||
"""Test Infinispan functionality."""
|
"""Test Infinispan functionality."""
|
||||||
|
|
||||||
|
import warnings
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
import pytest
|
import pytest
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
@ -11,9 +13,18 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
|||||||
fake_texts,
|
fake_texts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
"""
|
||||||
|
cd tests/integration_tests/vectorstores/docker-compose
|
||||||
|
./infinispan.sh
|
||||||
|
|
||||||
def _infinispan_setup_noautoconf() -> None:
|
Current Infinispan implementation relies on httpx: `pip install "httpx[http2]"`
|
||||||
ispnvs = InfinispanVS(auto_config=False)
|
if not installed. HTTP/2 is enable by default, if it's not
|
||||||
|
wanted use `pip install "httpx"`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _infinispan_setup_noautoconf(**kwargs: Any) -> None:
|
||||||
|
ispnvs = InfinispanVS(http2=_hasHttp2(), auto_config=False, **kwargs)
|
||||||
ispnvs.cache_delete()
|
ispnvs.cache_delete()
|
||||||
ispnvs.schema_delete()
|
ispnvs.schema_delete()
|
||||||
proto = """
|
proto = """
|
||||||
@ -54,64 +65,104 @@ def _infinispanvs_from_texts(
|
|||||||
ids=ids,
|
ids=ids,
|
||||||
clear_old=clear_old,
|
clear_old=clear_old,
|
||||||
auto_config=auto_config,
|
auto_config=auto_config,
|
||||||
|
http2=_hasHttp2(),
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _hasHttp2() -> bool:
|
||||||
|
try:
|
||||||
|
httpx.Client(http2=True)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("autoconfig", [False, True])
|
@pytest.mark.parametrize("autoconfig", [False, True])
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"conn_opts",
|
||||||
|
[
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
"user": "user",
|
||||||
|
"password": "password",
|
||||||
|
"hosts": ["localhost:11232"],
|
||||||
|
"schema": "http",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user": "user",
|
||||||
|
"password": "password",
|
||||||
|
"hosts": ["localhost:11242"],
|
||||||
|
"schema": "https",
|
||||||
|
"verify": False,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
class TestBasic:
|
class TestBasic:
|
||||||
def test_infinispan(self, autoconfig: bool) -> None:
|
def test_infinispan(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||||
"""Test end to end construction and search."""
|
"""Test end to end construction and search."""
|
||||||
if not autoconfig:
|
if not autoconfig:
|
||||||
_infinispan_setup_noautoconf()
|
_infinispan_setup_noautoconf(**conn_opts)
|
||||||
docsearch = _infinispanvs_from_texts(auto_config=autoconfig)
|
docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
assert output == [Document(page_content="foo")]
|
assert output == [Document(page_content="foo")]
|
||||||
|
|
||||||
def test_infinispan_with_metadata(self, autoconfig: bool) -> None:
|
def test_infinispan_with_auth(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
if not autoconfig:
|
||||||
|
_infinispan_setup_noautoconf(**conn_opts)
|
||||||
|
docsearch = _infinispanvs_from_texts(auto_config=autoconfig, **conn_opts)
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo")]
|
||||||
|
|
||||||
|
def test_infinispan_with_metadata(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||||
"""Test with metadata"""
|
"""Test with metadata"""
|
||||||
if not autoconfig:
|
if not autoconfig:
|
||||||
_infinispan_setup_noautoconf()
|
_infinispan_setup_noautoconf(**conn_opts)
|
||||||
meta = []
|
meta = []
|
||||||
for _ in range(len(fake_texts)):
|
for _ in range(len(fake_texts)):
|
||||||
meta.append({"label": "test"})
|
meta.append({"label": "test"})
|
||||||
docsearch = _infinispanvs_from_texts(metadatas=meta, auto_config=autoconfig)
|
docsearch = _infinispanvs_from_texts(
|
||||||
|
metadatas=meta, auto_config=autoconfig, **conn_opts
|
||||||
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
assert output == [Document(page_content="foo", metadata={"label": "test"})]
|
assert output == [Document(page_content="foo", metadata={"label": "test"})]
|
||||||
|
|
||||||
def test_infinispan_with_metadata_with_output_fields(
|
def test_infinispan_with_metadata_with_output_fields(
|
||||||
self, autoconfig: bool
|
self, autoconfig: bool, conn_opts: dict
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test with metadata"""
|
"""Test with metadata"""
|
||||||
if not autoconfig:
|
if not autoconfig:
|
||||||
_infinispan_setup_noautoconf()
|
_infinispan_setup_noautoconf(**conn_opts)
|
||||||
metadatas = [
|
metadatas = [
|
||||||
{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))
|
{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))
|
||||||
]
|
]
|
||||||
c = {"output_fields": ["label", "page", "text"]}
|
c = {"output_fields": ["label", "page", "text"]}
|
||||||
docsearch = _infinispanvs_from_texts(
|
docsearch = _infinispanvs_from_texts(
|
||||||
metadatas=metadatas, configuration=c, auto_config=autoconfig
|
metadatas=metadatas, configuration=c, auto_config=autoconfig, **conn_opts
|
||||||
)
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
assert output == [
|
assert output == [
|
||||||
Document(page_content="foo", metadata={"label": "label0", "page": 0})
|
Document(page_content="foo", metadata={"label": "label0", "page": 0})
|
||||||
]
|
]
|
||||||
|
|
||||||
def test_infinispanvs_with_id(self, autoconfig: bool) -> None:
|
def test_infinispanvs_with_id(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||||
"""Test with ids"""
|
"""Test with ids"""
|
||||||
ids = ["id_" + str(i) for i in range(len(fake_texts))]
|
ids = ["id_" + str(i) for i in range(len(fake_texts))]
|
||||||
docsearch = _infinispanvs_from_texts(ids=ids, auto_config=autoconfig)
|
docsearch = _infinispanvs_from_texts(
|
||||||
|
ids=ids, auto_config=autoconfig, **conn_opts
|
||||||
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
assert output == [Document(page_content="foo")]
|
assert output == [Document(page_content="foo")]
|
||||||
|
|
||||||
def test_infinispan_with_score(self, autoconfig: bool) -> None:
|
def test_infinispan_with_score(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||||
"""Test end to end construction and search with scores and IDs."""
|
"""Test end to end construction and search with scores and IDs."""
|
||||||
if not autoconfig:
|
if not autoconfig:
|
||||||
_infinispan_setup_noautoconf()
|
_infinispan_setup_noautoconf(**conn_opts)
|
||||||
texts = ["foo", "bar", "baz"]
|
texts = ["foo", "bar", "baz"]
|
||||||
metadatas = [{"page": i} for i in range(len(texts))]
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
docsearch = _infinispanvs_from_texts(
|
docsearch = _infinispanvs_from_texts(
|
||||||
metadatas=metadatas, auto_config=autoconfig
|
metadatas=metadatas, auto_config=autoconfig, **conn_opts
|
||||||
)
|
)
|
||||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||||
docs = [o[0] for o in output]
|
docs = [o[0] for o in output]
|
||||||
@ -123,14 +174,14 @@ class TestBasic:
|
|||||||
]
|
]
|
||||||
assert scores[0] >= scores[1] >= scores[2]
|
assert scores[0] >= scores[1] >= scores[2]
|
||||||
|
|
||||||
def test_infinispan_add_texts(self, autoconfig: bool) -> None:
|
def test_infinispan_add_texts(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||||
"""Test end to end construction and MRR search."""
|
"""Test end to end construction and MRR search."""
|
||||||
if not autoconfig:
|
if not autoconfig:
|
||||||
_infinispan_setup_noautoconf()
|
_infinispan_setup_noautoconf(**conn_opts)
|
||||||
texts = ["foo", "bar", "baz"]
|
texts = ["foo", "bar", "baz"]
|
||||||
metadatas = [{"page": i} for i in range(len(texts))]
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
docsearch = _infinispanvs_from_texts(
|
docsearch = _infinispanvs_from_texts(
|
||||||
metadatas=metadatas, auto_config=autoconfig
|
metadatas=metadatas, auto_config=autoconfig, **conn_opts
|
||||||
)
|
)
|
||||||
|
|
||||||
docsearch.add_texts(texts, metadatas)
|
docsearch.add_texts(texts, metadatas)
|
||||||
@ -138,19 +189,22 @@ class TestBasic:
|
|||||||
output = docsearch.similarity_search("foo", k=10)
|
output = docsearch.similarity_search("foo", k=10)
|
||||||
assert len(output) == 6
|
assert len(output) == 6
|
||||||
|
|
||||||
def test_infinispan_no_clear_old(self, autoconfig: bool) -> None:
|
def test_infinispan_no_clear_old(self, autoconfig: bool, conn_opts: dict) -> None:
|
||||||
"""Test end to end construction and MRR search."""
|
"""Test end to end construction and MRR search."""
|
||||||
if not autoconfig:
|
if not autoconfig:
|
||||||
_infinispan_setup_noautoconf()
|
_infinispan_setup_noautoconf(**conn_opts)
|
||||||
texts = ["foo", "bar", "baz"]
|
texts = ["foo", "bar", "baz"]
|
||||||
metadatas = [{"page": i} for i in range(len(texts))]
|
metadatas = [{"page": i} for i in range(len(texts))]
|
||||||
docsearch = _infinispanvs_from_texts(
|
docsearch = _infinispanvs_from_texts(
|
||||||
metadatas=metadatas, auto_config=autoconfig
|
metadatas=metadatas, auto_config=autoconfig, **conn_opts
|
||||||
)
|
)
|
||||||
del docsearch
|
del docsearch
|
||||||
try:
|
try:
|
||||||
docsearch = _infinispanvs_from_texts(
|
docsearch = _infinispanvs_from_texts(
|
||||||
metadatas=metadatas, clear_old=False, auto_config=autoconfig
|
metadatas=metadatas,
|
||||||
|
clear_old=False,
|
||||||
|
auto_config=autoconfig,
|
||||||
|
**conn_opts,
|
||||||
)
|
)
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
if autoconfig:
|
if autoconfig:
|
||||||
@ -159,3 +213,12 @@ class TestBasic:
|
|||||||
raise
|
raise
|
||||||
output = docsearch.similarity_search("foo", k=10)
|
output = docsearch.similarity_search("foo", k=10)
|
||||||
assert len(output) == 6
|
assert len(output) == 6
|
||||||
|
|
||||||
|
|
||||||
|
class TestHttp2:
|
||||||
|
def test_http2(self) -> None:
|
||||||
|
try:
|
||||||
|
httpx.Client(http2=True)
|
||||||
|
except Exception:
|
||||||
|
warnings.warn('pip install "httpx[http2]" if you need HTTP/2')
|
||||||
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user