mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 21:47:12 +00:00
community[patch]: add hybrid search to singlestoredb vectorstore (#20793)
Implemented the ability to enable full-text search within the SingleStore vector store, offering users a versatile range of search strategies. This enhancement allows users to seamlessly combine full-text search with vector search, enabling the following search strategies: * Search solely by vector similarity. * Conduct searches exclusively based on text similarity, utilizing Lucene internally. * Filter search results by text similarity score, with the option to specify a threshold, followed by a search based on vector similarity. * Filter results by vector similarity score before conducting a search based on text similarity. * Perform searches using a weighted sum of vector and text similarity scores. Additionally, integration tests have been added to comprehensively cover all scenarios. Updated notebook with examples. CC: @baskaryan, @hwchase17 --------- Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
@@ -38,6 +39,15 @@ class SingleStoreDB(VectorStore):
|
||||
optionally, the names of the table and the fields to use.
|
||||
"""
|
||||
|
||||
class SearchStrategy(str, Enum):
|
||||
"""Enumerator of the Search strategies for searching in the vectorstore."""
|
||||
|
||||
VECTOR_ONLY = "VECTOR_ONLY"
|
||||
TEXT_ONLY = "TEXT_ONLY"
|
||||
FILTER_BY_TEXT = "FILTER_BY_TEXT"
|
||||
FILTER_BY_VECTOR = "FILTER_BY_VECTOR"
|
||||
WEIGHTED_SUM = "WEIGHTED_SUM"
|
||||
|
||||
def _get_connection(self: SingleStoreDB) -> Any:
|
||||
try:
|
||||
import singlestoredb as s2
|
||||
@@ -57,10 +67,12 @@ class SingleStoreDB(VectorStore):
|
||||
content_field: str = "content",
|
||||
metadata_field: str = "metadata",
|
||||
vector_field: str = "vector",
|
||||
id_field: str = "id",
|
||||
use_vector_index: bool = False,
|
||||
vector_index_name: str = "",
|
||||
vector_index_options: Optional[dict] = None,
|
||||
vector_size: int = 1536,
|
||||
use_full_text_search: bool = False,
|
||||
pool_size: int = 5,
|
||||
max_overflow: int = 10,
|
||||
timeout: float = 30,
|
||||
@@ -81,7 +93,8 @@ class SingleStoreDB(VectorStore):
|
||||
- EUCLIDEAN_DISTANCE: Computes the Euclidean distance between
|
||||
two vectors. This metric considers the geometric distance in
|
||||
the vector space, and might be more suitable for embeddings
|
||||
that rely on spatial relationships.
|
||||
that rely on spatial relationships. This metric is not
|
||||
compatible with the WEIGHTED_SUM search strategy.
|
||||
|
||||
table_name (str, optional): Specifies the name of the table in use.
|
||||
Defaults to "embeddings".
|
||||
@@ -91,6 +104,8 @@ class SingleStoreDB(VectorStore):
|
||||
Defaults to "metadata".
|
||||
vector_field (str, optional): Specifies the field to store the vector.
|
||||
Defaults to "vector".
|
||||
id_field (str, optional): Specifies the field to store the id.
|
||||
Defaults to "id".
|
||||
|
||||
use_vector_index (bool, optional): Toggles the use of a vector index.
|
||||
Works only with SingleStoreDB 8.5 or later. Defaults to False.
|
||||
@@ -113,6 +128,14 @@ class SingleStoreDB(VectorStore):
|
||||
Should be set to the same value as the size of the vectors
|
||||
stored in the vector_field.
|
||||
|
||||
use_full_text_search (bool, optional): Toggles the use a full-text index
|
||||
on the document content. Defaults to False. If set to True, the table
|
||||
will be created with a full-text index on the content field,
|
||||
and the simularity_search method will all using TEXT_ONLY,
|
||||
FILTER_BY_TEXT, FILTER_BY_VECTOR, and WIGHTED_SUM search strategies.
|
||||
If set to False, the simularity_search method will only allow
|
||||
VECTOR_ONLY search strategy.
|
||||
|
||||
Following arguments pertain to the connection pool:
|
||||
|
||||
pool_size (int, optional): Determines the number of active connections in
|
||||
@@ -165,7 +188,7 @@ class SingleStoreDB(VectorStore):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
|
||||
vectorstore = SingleStoreDB(
|
||||
@@ -177,7 +200,7 @@ class SingleStoreDB(VectorStore):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
|
||||
vectorstore = SingleStoreDB(
|
||||
@@ -197,7 +220,7 @@ class SingleStoreDB(VectorStore):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
|
||||
os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
|
||||
@@ -207,7 +230,7 @@ class SingleStoreDB(VectorStore):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
|
||||
os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
|
||||
@@ -215,6 +238,18 @@ class SingleStoreDB(VectorStore):
|
||||
OpenAIEmbeddings(),
|
||||
use_vector_index=True,
|
||||
)
|
||||
|
||||
Using full-text index:
|
||||
|
||||
.. code-block:: python
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
|
||||
os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
|
||||
vectorstore = SingleStoreDB(
|
||||
OpenAIEmbeddings(),
|
||||
use_full_text_search=True,
|
||||
)
|
||||
"""
|
||||
|
||||
self.embedding = embedding
|
||||
@@ -223,6 +258,7 @@ class SingleStoreDB(VectorStore):
|
||||
self.content_field = self._sanitize_input(content_field)
|
||||
self.metadata_field = self._sanitize_input(metadata_field)
|
||||
self.vector_field = self._sanitize_input(vector_field)
|
||||
self.id_field = self._sanitize_input(id_field)
|
||||
|
||||
self.use_vector_index = bool(use_vector_index)
|
||||
self.vector_index_name = self._sanitize_input(vector_index_name)
|
||||
@@ -230,6 +266,8 @@ class SingleStoreDB(VectorStore):
|
||||
self.vector_index_options["metric_type"] = self.distance_strategy
|
||||
self.vector_size = int(vector_size)
|
||||
|
||||
self.use_full_text_search = bool(use_full_text_search)
|
||||
|
||||
# Pass the rest of the kwargs to the connection.
|
||||
self.connection_kwargs = kwargs
|
||||
|
||||
@@ -238,7 +276,7 @@ class SingleStoreDB(VectorStore):
|
||||
self.connection_kwargs["conn_attrs"] = dict()
|
||||
|
||||
self.connection_kwargs["conn_attrs"]["_connector_name"] = "langchain python sdk"
|
||||
self.connection_kwargs["conn_attrs"]["_connector_version"] = "1.0.2"
|
||||
self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.0.0"
|
||||
|
||||
# Create connection pool.
|
||||
self.connection_pool = QueuePool(
|
||||
@@ -266,6 +304,9 @@ class SingleStoreDB(VectorStore):
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
full_text_index = ""
|
||||
if self.use_full_text_search:
|
||||
full_text_index = ", FULLTEXT({})".format(self.content_field)
|
||||
if self.use_vector_index:
|
||||
index_options = ""
|
||||
if self.vector_index_options and len(self.vector_index_options) > 0:
|
||||
@@ -274,10 +315,11 @@ class SingleStoreDB(VectorStore):
|
||||
)
|
||||
cur.execute(
|
||||
"""CREATE TABLE IF NOT EXISTS {}
|
||||
({} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
{} VECTOR({}, F32) NOT NULL, {} JSON,
|
||||
VECTOR INDEX {} ({}) {});""".format(
|
||||
({} BIGINT AUTO_INCREMENT PRIMARY KEY, {} LONGTEXT CHARACTER
|
||||
SET utf8mb4 COLLATE utf8mb4_general_ci, {} VECTOR({}, F32)
|
||||
NOT NULL, {} JSON, VECTOR INDEX {} ({}) {}{});""".format(
|
||||
self.table_name,
|
||||
self.id_field,
|
||||
self.content_field,
|
||||
self.vector_field,
|
||||
self.vector_size,
|
||||
@@ -285,17 +327,21 @@ class SingleStoreDB(VectorStore):
|
||||
self.vector_index_name,
|
||||
self.vector_field,
|
||||
index_options,
|
||||
full_text_index,
|
||||
),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
"""CREATE TABLE IF NOT EXISTS {}
|
||||
({} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||
{} BLOB, {} JSON);""".format(
|
||||
({} BIGINT AUTO_INCREMENT PRIMARY KEY, {} LONGTEXT CHARACTER
|
||||
SET utf8mb4 COLLATE utf8mb4_general_ci, {} BLOB, {} JSON{});
|
||||
""".format(
|
||||
self.table_name,
|
||||
self.id_field,
|
||||
self.content_field,
|
||||
self.vector_field,
|
||||
self.metadata_field,
|
||||
full_text_index,
|
||||
),
|
||||
)
|
||||
finally:
|
||||
@@ -365,8 +411,12 @@ class SingleStoreDB(VectorStore):
|
||||
else self.embedding.embed_documents([text])[0]
|
||||
)
|
||||
cur.execute(
|
||||
"INSERT INTO {} VALUES (%s, JSON_ARRAY_PACK(%s), %s)".format(
|
||||
self.table_name
|
||||
"""INSERT INTO {}({}, {}, {})
|
||||
VALUES (%s, JSON_ARRAY_PACK(%s), %s)""".format(
|
||||
self.table_name,
|
||||
self.content_field,
|
||||
self.vector_field,
|
||||
self.metadata_field,
|
||||
),
|
||||
(
|
||||
text,
|
||||
@@ -374,7 +424,7 @@ class SingleStoreDB(VectorStore):
|
||||
json.dumps(metadata),
|
||||
),
|
||||
)
|
||||
if self.use_vector_index:
|
||||
if self.use_vector_index or self.use_full_text_search:
|
||||
cur.execute("OPTIMIZE TABLE {} FLUSH;".format(self.table_name))
|
||||
finally:
|
||||
cur.close()
|
||||
@@ -383,7 +433,16 @@ class SingleStoreDB(VectorStore):
|
||||
return []
|
||||
|
||||
def similarity_search(
|
||||
self, query: str, k: int = 4, filter: Optional[dict] = None, **kwargs: Any
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[dict] = None,
|
||||
search_strategy: SearchStrategy = SearchStrategy.VECTOR_ONLY,
|
||||
filter_threshold: float = 0,
|
||||
text_weight: float = 0.5,
|
||||
vector_weight: float = 0.5,
|
||||
vector_select_count_multiplier: int = 10,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Returns the most similar indexed documents to the query text.
|
||||
|
||||
@@ -393,29 +452,119 @@ class SingleStoreDB(VectorStore):
|
||||
query (str): The query text for which to find similar documents.
|
||||
k (int): The number of documents to return. Default is 4.
|
||||
filter (dict): A dictionary of metadata fields and values to filter by.
|
||||
Default is None.
|
||||
search_strategy (SearchStrategy): The search strategy to use.
|
||||
Default is SearchStrategy.VECTOR_ONLY.
|
||||
Available options are:
|
||||
- SearchStrategy.VECTOR_ONLY: Searches only by vector similarity.
|
||||
- SearchStrategy.TEXT_ONLY: Searches only by text similarity. This
|
||||
option is only available if use_full_text_search is True.
|
||||
- SearchStrategy.FILTER_BY_TEXT: Filters by text similarity and
|
||||
searches by vector similarity. This option is only available if
|
||||
use_full_text_search is True.
|
||||
- SearchStrategy.FILTER_BY_VECTOR: Filters by vector similarity and
|
||||
searches by text similarity. This option is only available if
|
||||
use_full_text_search is True.
|
||||
- SearchStrategy.WEIGHTED_SUM: Searches by a weighted sum of text and
|
||||
vector similarity. This option is only available if
|
||||
use_full_text_search is True and distance_strategy is DOT_PRODUCT.
|
||||
filter_threshold (float): The threshold for filtering by text or vector
|
||||
similarity. Default is 0. This option has effect only if search_strategy
|
||||
is SearchStrategy.FILTER_BY_TEXT or SearchStrategy.FILTER_BY_VECTOR.
|
||||
text_weight (float): The weight of text similarity in the weighted sum
|
||||
search strategy. Default is 0.5. This option has effect only if
|
||||
search_strategy is SearchStrategy.WEIGHTED_SUM.
|
||||
vector_weight (float): The weight of vector similarity in the weighted sum
|
||||
search strategy. Default is 0.5. This option has effect only if
|
||||
search_strategy is SearchStrategy.WEIGHTED_SUM.
|
||||
vector_select_count_multiplier (int): The multiplier for the number of
|
||||
vectors to select when using the vector index. Default is 10.
|
||||
This parameter has effect only if use_vector_index is True and
|
||||
search_strategy is SearchStrategy.WEIGHTED_SUM or
|
||||
SearchStrategy.FILTER_BY_TEXT.
|
||||
The number of vectors selected will
|
||||
be k * vector_select_count_multiplier.
|
||||
This is needed due to the limitations of the vector index.
|
||||
|
||||
|
||||
Returns:
|
||||
List[Document]: A list of documents that are most similar to the query text.
|
||||
|
||||
Examples:
|
||||
|
||||
Basic Usage:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
s2 = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
OpenAIEmbeddings(),
|
||||
host="username:password@localhost:3306/database"
|
||||
)
|
||||
s2.similarity_search("query text", 1,
|
||||
{"metadata_field": "metadata_value"})
|
||||
results = s2.similarity_search("query text", 1,
|
||||
{"metadata_field": "metadata_value"})
|
||||
|
||||
Different Search Strategies:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
s2 = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
OpenAIEmbeddings(),
|
||||
host="username:password@localhost:3306/database",
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
)
|
||||
results = s2.similarity_search("query text", 1,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_TEXT,
|
||||
filter_threshold=0.5)
|
||||
|
||||
Weighted Sum Search Strategy:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
s2 = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
OpenAIEmbeddings(),
|
||||
host="username:password@localhost:3306/database",
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
)
|
||||
results = s2.similarity_search("query text", 1,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
|
||||
text_weight=0.3,
|
||||
vector_weight=0.7)
|
||||
"""
|
||||
docs_and_scores = self.similarity_search_with_score(
|
||||
query=query, k=k, filter=filter
|
||||
query=query,
|
||||
k=k,
|
||||
filter=filter,
|
||||
search_strategy=search_strategy,
|
||||
filter_threshold=filter_threshold,
|
||||
text_weight=text_weight,
|
||||
vector_weight=vector_weight,
|
||||
vector_select_count_multiplier=vector_select_count_multiplier,
|
||||
**kwargs,
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def similarity_search_with_score(
|
||||
self, query: str, k: int = 4, filter: Optional[dict] = None
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[dict] = None,
|
||||
search_strategy: SearchStrategy = SearchStrategy.VECTOR_ONLY,
|
||||
filter_threshold: float = 1,
|
||||
text_weight: float = 0.5,
|
||||
vector_weight: float = 0.5,
|
||||
vector_select_count_multiplier: int = 10,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query. Uses cosine similarity.
|
||||
|
||||
@@ -424,20 +573,156 @@ class SingleStoreDB(VectorStore):
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
filter: A dictionary of metadata fields and values to filter by.
|
||||
Defaults to None.
|
||||
|
||||
search_strategy (SearchStrategy): The search strategy to use.
|
||||
Default is SearchStrategy.VECTOR_ONLY.
|
||||
Available options are:
|
||||
- SearchStrategy.VECTOR_ONLY: Searches only by vector similarity.
|
||||
- SearchStrategy.TEXT_ONLY: Searches only by text similarity. This
|
||||
option is only available if use_full_text_search is True.
|
||||
- SearchStrategy.FILTER_BY_TEXT: Filters by text similarity and
|
||||
searches by vector similarity. This option is only available if
|
||||
use_full_text_search is True.
|
||||
- SearchStrategy.FILTER_BY_VECTOR: Filters by vector similarity and
|
||||
searches by text similarity. This option is only available if
|
||||
use_full_text_search is True.
|
||||
- SearchStrategy.WEIGHTED_SUM: Searches by a weighted sum of text and
|
||||
vector similarity. This option is only available if
|
||||
use_full_text_search is True and distance_strategy is DOT_PRODUCT.
|
||||
filter_threshold (float): The threshold for filtering by text or vector
|
||||
similarity. Default is 0. This option has effect only if search_strategy
|
||||
is SearchStrategy.FILTER_BY_TEXT or SearchStrategy.FILTER_BY_VECTOR.
|
||||
text_weight (float): The weight of text similarity in the weighted sum
|
||||
search strategy. Default is 0.5. This option has effect only if
|
||||
search_strategy is SearchStrategy.WEIGHTED_SUM.
|
||||
vector_weight (float): The weight of vector similarity in the weighted sum
|
||||
search strategy. Default is 0.5. This option has effect only if
|
||||
search_strategy is SearchStrategy.WEIGHTED_SUM.
|
||||
vector_select_count_multiplier (int): The multiplier for the number of
|
||||
vectors to select when using the vector index. Default is 10.
|
||||
This parameter has effect only if use_vector_index is True and
|
||||
search_strategy is SearchStrategy.WEIGHTED_SUM or
|
||||
SearchStrategy.FILTER_BY_TEXT.
|
||||
The number of vectors selected will
|
||||
be k * vector_select_count_multiplier.
|
||||
This is needed due to the limitations of the vector index.
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
document.
|
||||
|
||||
Raises:
|
||||
ValueError: If the search strategy is not supported with the
|
||||
distance strategy.
|
||||
|
||||
Examples:
|
||||
Basic Usage:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
s2 = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
OpenAIEmbeddings(),
|
||||
host="username:password@localhost:3306/database"
|
||||
)
|
||||
results = s2.similarity_search_with_score("query text", 1,
|
||||
{"metadata_field": "metadata_value"})
|
||||
|
||||
Different Search Strategies:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
s2 = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
OpenAIEmbeddings(),
|
||||
host="username:password@localhost:3306/database",
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
)
|
||||
results = s2.similarity_search_with_score("query text", 1,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
|
||||
filter_threshold=0.5)
|
||||
|
||||
Weighted Sum Search Strategy:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
s2 = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
OpenAIEmbeddings(),
|
||||
host="username:password@localhost:3306/database",
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
)
|
||||
results = s2.similarity_search_with_score("query text", 1,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
|
||||
text_weight=0.3,
|
||||
vector_weight=0.7)
|
||||
"""
|
||||
|
||||
if (
|
||||
search_strategy != SingleStoreDB.SearchStrategy.VECTOR_ONLY
|
||||
and not self.use_full_text_search
|
||||
):
|
||||
raise ValueError(
|
||||
"""Search strategy {} is not supported
|
||||
when use_full_text_search is False""".format(search_strategy)
|
||||
)
|
||||
|
||||
if (
|
||||
search_strategy == SingleStoreDB.SearchStrategy.WEIGHTED_SUM
|
||||
and self.distance_strategy != DistanceStrategy.DOT_PRODUCT
|
||||
):
|
||||
raise ValueError(
|
||||
"Search strategy {} is not supported with distance strategy {}".format(
|
||||
search_strategy, self.distance_strategy
|
||||
)
|
||||
)
|
||||
|
||||
# Creates embedding vector from user query
|
||||
embedding = self.embedding.embed_query(query)
|
||||
embedding = []
|
||||
if search_strategy != SingleStoreDB.SearchStrategy.TEXT_ONLY:
|
||||
embedding = self.embedding.embed_query(query)
|
||||
|
||||
self.embedding.embed_query(query)
|
||||
conn = self.connection_pool.connect()
|
||||
result = []
|
||||
where_clause: str = ""
|
||||
where_clause_values: List[Any] = []
|
||||
if filter:
|
||||
if filter or search_strategy in [
|
||||
SingleStoreDB.SearchStrategy.FILTER_BY_TEXT,
|
||||
SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
|
||||
]:
|
||||
where_clause = "WHERE "
|
||||
arguments = []
|
||||
|
||||
if search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_TEXT:
|
||||
arguments.append(
|
||||
"MATCH ({}) AGAINST (%s) > %s".format(self.content_field)
|
||||
)
|
||||
where_clause_values.append(query)
|
||||
where_clause_values.append(float(filter_threshold))
|
||||
|
||||
if search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR:
|
||||
condition = "{}({}, JSON_ARRAY_PACK(%s)) ".format(
|
||||
self.distance_strategy.name
|
||||
if isinstance(self.distance_strategy, DistanceStrategy)
|
||||
else self.distance_strategy,
|
||||
self.vector_field,
|
||||
)
|
||||
if self.distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE:
|
||||
condition += "< %s"
|
||||
else:
|
||||
condition += "> %s"
|
||||
arguments.append(condition)
|
||||
where_clause_values.append("[{}]".format(",".join(map(str, embedding))))
|
||||
where_clause_values.append(float(filter_threshold))
|
||||
|
||||
def build_where_clause(
|
||||
where_clause_values: List[Any],
|
||||
sub_filter: dict,
|
||||
@@ -459,29 +744,98 @@ class SingleStoreDB(VectorStore):
|
||||
where_clause_values += prefix_args + [key]
|
||||
where_clause_values.append(json.dumps(sub_filter[key]))
|
||||
|
||||
build_where_clause(where_clause_values, filter)
|
||||
if filter:
|
||||
build_where_clause(where_clause_values, filter)
|
||||
where_clause += " AND ".join(arguments)
|
||||
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
cur.execute(
|
||||
"""SELECT {}, {}, {}({}, JSON_ARRAY_PACK(%s)) as __score
|
||||
FROM {} {} ORDER BY __score {} LIMIT %s""".format(
|
||||
self.content_field,
|
||||
self.metadata_field,
|
||||
self.distance_strategy.name
|
||||
if isinstance(self.distance_strategy, DistanceStrategy)
|
||||
else self.distance_strategy,
|
||||
self.vector_field,
|
||||
self.table_name,
|
||||
where_clause,
|
||||
ORDERING_DIRECTIVE[self.distance_strategy],
|
||||
),
|
||||
("[{}]".format(",".join(map(str, embedding))),)
|
||||
+ tuple(where_clause_values)
|
||||
+ (k,),
|
||||
)
|
||||
if (
|
||||
search_strategy == SingleStoreDB.SearchStrategy.VECTOR_ONLY
|
||||
or search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_TEXT
|
||||
):
|
||||
search_options = ""
|
||||
if (
|
||||
self.use_vector_index
|
||||
and search_strategy
|
||||
== SingleStoreDB.SearchStrategy.FILTER_BY_TEXT
|
||||
):
|
||||
search_options = "SEARCH_OPTIONS '{\"k\":%d}'" % (
|
||||
k * vector_select_count_multiplier
|
||||
)
|
||||
cur.execute(
|
||||
"""SELECT {}, {}, {}({}, JSON_ARRAY_PACK(%s)) as __score
|
||||
FROM {} {} ORDER BY __score {}{} LIMIT %s""".format(
|
||||
self.content_field,
|
||||
self.metadata_field,
|
||||
self.distance_strategy.name
|
||||
if isinstance(self.distance_strategy, DistanceStrategy)
|
||||
else self.distance_strategy,
|
||||
self.vector_field,
|
||||
self.table_name,
|
||||
where_clause,
|
||||
search_options,
|
||||
ORDERING_DIRECTIVE[self.distance_strategy],
|
||||
),
|
||||
("[{}]".format(",".join(map(str, embedding))),)
|
||||
+ tuple(where_clause_values)
|
||||
+ (k,),
|
||||
)
|
||||
elif (
|
||||
search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR
|
||||
or search_strategy == SingleStoreDB.SearchStrategy.TEXT_ONLY
|
||||
):
|
||||
cur.execute(
|
||||
"""SELECT {}, {}, MATCH ({}) AGAINST (%s) as __score
|
||||
FROM {} {} ORDER BY __score DESC LIMIT %s""".format(
|
||||
self.content_field,
|
||||
self.metadata_field,
|
||||
self.content_field,
|
||||
self.table_name,
|
||||
where_clause,
|
||||
),
|
||||
(query,) + tuple(where_clause_values) + (k,),
|
||||
)
|
||||
elif search_strategy == SingleStoreDB.SearchStrategy.WEIGHTED_SUM:
|
||||
cur.execute(
|
||||
"""SELECT {}, {}, __score1 * %s + __score2 * %s as __score
|
||||
FROM (
|
||||
SELECT {}, {}, {}, MATCH ({}) AGAINST (%s) as __score1
|
||||
FROM {} {}) r1 FULL OUTER JOIN (
|
||||
SELECT {}, {}({}, JSON_ARRAY_PACK(%s)) as __score2
|
||||
FROM {} {} ORDER BY __score2 {} LIMIT %s
|
||||
) r2 ON r1.{} = r2.{} ORDER BY __score {} LIMIT %s""".format(
|
||||
self.content_field,
|
||||
self.metadata_field,
|
||||
self.id_field,
|
||||
self.content_field,
|
||||
self.metadata_field,
|
||||
self.content_field,
|
||||
self.table_name,
|
||||
where_clause,
|
||||
self.id_field,
|
||||
self.distance_strategy.name
|
||||
if isinstance(self.distance_strategy, DistanceStrategy)
|
||||
else self.distance_strategy,
|
||||
self.vector_field,
|
||||
self.table_name,
|
||||
where_clause,
|
||||
ORDERING_DIRECTIVE[self.distance_strategy],
|
||||
self.id_field,
|
||||
self.id_field,
|
||||
ORDERING_DIRECTIVE[self.distance_strategy],
|
||||
),
|
||||
(text_weight, vector_weight, query)
|
||||
+ tuple(where_clause_values)
|
||||
+ ("[{}]".format(",".join(map(str, embedding))),)
|
||||
+ tuple(where_clause_values)
|
||||
+ (k * vector_select_count_multiplier, k),
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Invalid search strategy: {}".format(search_strategy)
|
||||
)
|
||||
|
||||
for row in cur.fetchall():
|
||||
doc = Document(page_content=row[0], metadata=row[1])
|
||||
@@ -503,10 +857,12 @@ class SingleStoreDB(VectorStore):
|
||||
content_field: str = "content",
|
||||
metadata_field: str = "metadata",
|
||||
vector_field: str = "vector",
|
||||
id_field: str = "id",
|
||||
use_vector_index: bool = False,
|
||||
vector_index_name: str = "",
|
||||
vector_index_options: Optional[dict] = None,
|
||||
vector_size: int = 1536,
|
||||
use_full_text_search: bool = False,
|
||||
pool_size: int = 5,
|
||||
max_overflow: int = 10,
|
||||
timeout: float = 30,
|
||||
@@ -518,10 +874,99 @@ class SingleStoreDB(VectorStore):
|
||||
2. Creates a new table for the embeddings in SingleStoreDB.
|
||||
3. Adds the documents to the newly created table.
|
||||
This is intended to be a quick way to get started.
|
||||
Args:
|
||||
texts (List[str]): List of texts to add to the vectorstore.
|
||||
embedding (Embeddings): A text embedding model.
|
||||
metadatas (Optional[List[dict]], optional): Optional list of metadatas.
|
||||
Defaults to None.
|
||||
distance_strategy (DistanceStrategy, optional):
|
||||
Determines the strategy employed for calculating
|
||||
the distance between vectors in the embedding space.
|
||||
Defaults to DOT_PRODUCT.
|
||||
Available options are:
|
||||
- DOT_PRODUCT: Computes the scalar product of two vectors.
|
||||
This is the default behavior
|
||||
- EUCLIDEAN_DISTANCE: Computes the Euclidean distance between
|
||||
two vectors. This metric considers the geometric distance in
|
||||
the vector space, and might be more suitable for embeddings
|
||||
that rely on spatial relationships. This metric is not
|
||||
compatible with the WEIGHTED_SUM search strategy.
|
||||
table_name (str, optional): Specifies the name of the table in use.
|
||||
Defaults to "embeddings".
|
||||
content_field (str, optional): Specifies the field to store the content.
|
||||
Defaults to "content".
|
||||
metadata_field (str, optional): Specifies the field to store metadata.
|
||||
Defaults to "metadata".
|
||||
vector_field (str, optional): Specifies the field to store the vector.
|
||||
Defaults to "vector".
|
||||
id_field (str, optional): Specifies the field to store the id.
|
||||
Defaults to "id".
|
||||
use_vector_index (bool, optional): Toggles the use of a vector index.
|
||||
Works only with SingleStoreDB 8.5 or later. Defaults to False.
|
||||
If set to True, vector_size parameter is required to be set to
|
||||
a proper value.
|
||||
vector_index_name (str, optional): Specifies the name of the vector index.
|
||||
Defaults to empty. Will be ignored if use_vector_index is set to False.
|
||||
vector_index_options (dict, optional): Specifies the options for
|
||||
the vector index. Defaults to {}.
|
||||
Will be ignored if use_vector_index is set to False. The options are:
|
||||
index_type (str, optional): Specifies the type of the index.
|
||||
Defaults to IVF_PQFS.
|
||||
For more options, please refer to the SingleStoreDB documentation:
|
||||
https://docs.singlestore.com/cloud/reference/sql-reference/vector-functions/vector-indexing/
|
||||
vector_size (int, optional): Specifies the size of the vector.
|
||||
Defaults to 1536. Required if use_vector_index is set to True.
|
||||
Should be set to the same value as the size of the vectors
|
||||
stored in the vector_field.
|
||||
use_full_text_search (bool, optional): Toggles the use a full-text index
|
||||
on the document content. Defaults to False. If set to True, the table
|
||||
will be created with a full-text index on the content field,
|
||||
and the simularity_search method will all using TEXT_ONLY,
|
||||
FILTER_BY_TEXT, FILTER_BY_VECTOR, and WIGHTED_SUM search strategies.
|
||||
If set to False, the simularity_search method will only allow
|
||||
VECTOR_ONLY search strategy.
|
||||
|
||||
pool_size (int, optional): Determines the number of active connections in
|
||||
the pool. Defaults to 5.
|
||||
max_overflow (int, optional): Determines the maximum number of connections
|
||||
allowed beyond the pool_size. Defaults to 10.
|
||||
timeout (float, optional): Specifies the maximum wait time in seconds for
|
||||
establishing a connection. Defaults to 30.
|
||||
|
||||
Additional optional arguments provide further customization over the
|
||||
database connection:
|
||||
|
||||
pure_python (bool, optional): Toggles the connector mode. If True,
|
||||
operates in pure Python mode.
|
||||
local_infile (bool, optional): Allows local file uploads.
|
||||
charset (str, optional): Specifies the character set for string values.
|
||||
ssl_key (str, optional): Specifies the path of the file containing the SSL
|
||||
key.
|
||||
ssl_cert (str, optional): Specifies the path of the file containing the SSL
|
||||
certificate.
|
||||
ssl_ca (str, optional): Specifies the path of the file containing the SSL
|
||||
certificate authority.
|
||||
ssl_cipher (str, optional): Sets the SSL cipher list.
|
||||
ssl_disabled (bool, optional): Disables SSL usage.
|
||||
ssl_verify_cert (bool, optional): Verifies the server's certificate.
|
||||
Automatically enabled if ``ssl_ca`` is specified.
|
||||
ssl_verify_identity (bool, optional): Verifies the server's identity.
|
||||
conv (dict[int, Callable], optional): A dictionary of data conversion
|
||||
functions.
|
||||
credential_type (str, optional): Specifies the type of authentication to
|
||||
use: auth.PASSWORD, auth.JWT, or auth.BROWSER_SSO.
|
||||
autocommit (bool, optional): Enables autocommits.
|
||||
results_type (str, optional): Determines the structure of the query results:
|
||||
tuples, namedtuples, dicts.
|
||||
results_format (str, optional): Deprecated. This option has been renamed to
|
||||
results_type.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import SingleStoreDB
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
s2 = SingleStoreDB.from_texts(
|
||||
texts,
|
||||
OpenAIEmbeddings(),
|
||||
@@ -536,6 +981,7 @@ class SingleStoreDB(VectorStore):
|
||||
content_field=content_field,
|
||||
metadata_field=metadata_field,
|
||||
vector_field=vector_field,
|
||||
id_field=id_field,
|
||||
pool_size=pool_size,
|
||||
max_overflow=max_overflow,
|
||||
timeout=timeout,
|
||||
@@ -543,6 +989,7 @@ class SingleStoreDB(VectorStore):
|
||||
vector_index_name=vector_index_name,
|
||||
vector_index_options=vector_index_options,
|
||||
vector_size=vector_size,
|
||||
use_full_text_search=use_full_text_search,
|
||||
**kwargs,
|
||||
)
|
||||
instance.add_texts(texts, metadatas, embedding.embed_documents(texts), **kwargs)
|
||||
|
@@ -1,4 +1,5 @@
|
||||
"""Test SingleStoreDB functionality."""
|
||||
import math
|
||||
import os
|
||||
import tempfile
|
||||
from typing import List
|
||||
@@ -67,11 +68,76 @@ class RandomEmbeddings(Embeddings):
|
||||
return [np.random.rand(100).tolist() for _ in uris]
|
||||
|
||||
|
||||
class IncrementalEmbeddings(Embeddings):
|
||||
"""Fake embeddings with incremental vectors. For testing purposes."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.counter = 0
|
||||
|
||||
def set_counter(self, counter: int) -> None:
|
||||
self.counter = counter
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
self.counter += 1
|
||||
return [
|
||||
math.cos(self.counter * math.pi / 10),
|
||||
math.sin(self.counter * math.pi / 10),
|
||||
]
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
return [self.embed_query(text) for text in texts]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def texts() -> List[str]:
|
||||
return ["foo", "bar", "baz"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def snow_rain_docs() -> List[Document]:
|
||||
return [
|
||||
Document(
|
||||
page_content="""In the parched desert, a sudden rainstorm brought relief,
|
||||
as the droplets danced upon the thirsty earth, rejuvenating the landscape
|
||||
with the sweet scent of petrichor.""",
|
||||
metadata={"count": "1", "category": "rain", "group": "a"},
|
||||
),
|
||||
Document(
|
||||
page_content="""Amidst the bustling cityscape, the rain fell relentlessly,
|
||||
creating a symphony of pitter-patter on the pavement, while umbrellas
|
||||
bloomed like colorful flowers in a sea of gray.""",
|
||||
metadata={"count": "2", "category": "rain", "group": "a"},
|
||||
),
|
||||
Document(
|
||||
page_content="""High in the mountains, the rain transformed into a delicate
|
||||
mist, enveloping the peaks in a mystical veil, where each droplet seemed to
|
||||
whisper secrets to the ancient rocks below.""",
|
||||
metadata={"count": "3", "category": "rain", "group": "b"},
|
||||
),
|
||||
Document(
|
||||
page_content="""Blanketing the countryside in a soft, pristine layer, the
|
||||
snowfall painted a serene tableau, muffling the world in a tranquil hush
|
||||
as delicate flakes settled upon the branches of trees like nature's own
|
||||
lacework.""",
|
||||
metadata={"count": "1", "category": "snow", "group": "b"},
|
||||
),
|
||||
Document(
|
||||
page_content="""In the urban landscape, snow descended, transforming
|
||||
bustling streets into a winter wonderland, where the laughter of
|
||||
children echoed amidst the flurry of snowballs and the twinkle of
|
||||
holiday lights.""",
|
||||
metadata={"count": "2", "category": "snow", "group": "a"},
|
||||
),
|
||||
Document(
|
||||
page_content="""Atop the rugged peaks, snow fell with an unyielding
|
||||
intensity, sculpting the landscape into a pristine alpine paradise,
|
||||
where the frozen crystals shimmered under the moonlight, casting a
|
||||
spell of enchantment over the wilderness below.""",
|
||||
metadata={"count": "3", "category": "snow", "group": "a"},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb(texts: List[str]) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
@@ -505,3 +571,184 @@ def test_singestoredb_add_image2() -> None:
|
||||
output = docsearch.similarity_search("horse", k=1)
|
||||
assert "horse" in output[0].page_content
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_text_only_search(snow_rain_docs: List[Document]) -> None:
|
||||
table_name = "test_singlestoredb_text_only_search"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB(
|
||||
RandomEmbeddings(),
|
||||
table_name=table_name,
|
||||
use_full_text_search=True,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
docsearch.add_documents(snow_rain_docs)
|
||||
output = docsearch.similarity_search(
|
||||
"rainstorm in parched desert",
|
||||
k=3,
|
||||
filter={"count": "1"},
|
||||
search_strategy=SingleStoreDB.SearchStrategy.TEXT_ONLY,
|
||||
)
|
||||
assert len(output) == 2
|
||||
assert (
|
||||
"In the parched desert, a sudden rainstorm brought relief,"
|
||||
in output[0].page_content
|
||||
)
|
||||
assert (
|
||||
"Blanketing the countryside in a soft, pristine layer" in output[1].page_content
|
||||
)
|
||||
|
||||
output = docsearch.similarity_search(
|
||||
"snowfall in countryside",
|
||||
k=3,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.TEXT_ONLY,
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert (
|
||||
"Blanketing the countryside in a soft, pristine layer,"
|
||||
in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_filter_by_text_search(snow_rain_docs: List[Document]) -> None:
|
||||
table_name = "test_singlestoredb_filter_by_text_search"
|
||||
drop(table_name)
|
||||
embeddings = IncrementalEmbeddings()
|
||||
docsearch = SingleStoreDB.from_documents(
|
||||
snow_rain_docs,
|
||||
embeddings,
|
||||
table_name=table_name,
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
vector_size=2,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"rainstorm in parched desert",
|
||||
k=1,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_TEXT,
|
||||
filter_threshold=0,
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert (
|
||||
"In the parched desert, a sudden rainstorm brought relief"
|
||||
in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_filter_by_vector_search1(snow_rain_docs: List[Document]) -> None:
|
||||
table_name = "test_singlestoredb_filter_by_vector_search1"
|
||||
drop(table_name)
|
||||
embeddings = IncrementalEmbeddings()
|
||||
docsearch = SingleStoreDB.from_documents(
|
||||
snow_rain_docs,
|
||||
embeddings,
|
||||
table_name=table_name,
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
vector_size=2,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"rainstorm in parched desert, rain",
|
||||
k=1,
|
||||
filter={"category": "rain"},
|
||||
search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
|
||||
filter_threshold=-0.2,
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert (
|
||||
"High in the mountains, the rain transformed into a delicate"
|
||||
in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_filter_by_vector_search2(snow_rain_docs: List[Document]) -> None:
|
||||
table_name = "test_singlestoredb_filter_by_vector_search2"
|
||||
drop(table_name)
|
||||
embeddings = IncrementalEmbeddings()
|
||||
docsearch = SingleStoreDB.from_documents(
|
||||
snow_rain_docs,
|
||||
embeddings,
|
||||
distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
|
||||
table_name=table_name,
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
vector_size=2,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"rainstorm in parched desert, rain",
|
||||
k=1,
|
||||
filter={"group": "a"},
|
||||
search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
|
||||
filter_threshold=1.5,
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert (
|
||||
"Amidst the bustling cityscape, the rain fell relentlessly"
|
||||
in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_weighted_sum_search_unsupported_strategy(
|
||||
snow_rain_docs: List[Document],
|
||||
) -> None:
|
||||
table_name = "test_singlestoredb_waighted_sum_search_unsupported_strategy"
|
||||
drop(table_name)
|
||||
embeddings = IncrementalEmbeddings()
|
||||
docsearch = SingleStoreDB.from_documents(
|
||||
snow_rain_docs,
|
||||
embeddings,
|
||||
table_name=table_name,
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
vector_size=2,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
|
||||
)
|
||||
try:
|
||||
docsearch.similarity_search(
|
||||
"rainstorm in parched desert, rain",
|
||||
k=1,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
|
||||
)
|
||||
except ValueError as e:
|
||||
assert "Search strategy WEIGHTED_SUM is not" in str(e)
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_weighted_sum_search(snow_rain_docs: List[Document]) -> None:
|
||||
table_name = "test_singlestoredb_waighted_sum_search"
|
||||
drop(table_name)
|
||||
embeddings = IncrementalEmbeddings()
|
||||
docsearch = SingleStoreDB.from_documents(
|
||||
snow_rain_docs,
|
||||
embeddings,
|
||||
table_name=table_name,
|
||||
use_full_text_search=True,
|
||||
use_vector_index=True,
|
||||
vector_size=2,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"rainstorm in parched desert, rain",
|
||||
k=1,
|
||||
search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
|
||||
filter={"category": "snow"},
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert (
|
||||
"Atop the rugged peaks, snow fell with an unyielding" in output[0].page_content
|
||||
)
|
||||
drop(table_name)
|
||||
|
Reference in New Issue
Block a user