mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 12:18:24 +00:00
Added SingleStoreDB Vector Store (#5619)
- Added `SingleStoreDB` vector store, which is a wrapper over the SingleStore DB database, that can be used as a vector storage and has an efficient similarity search. - Added integration tests for the vector store - Added jupyter notebook with the example @dev2049 --------- Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com> Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
78aa59c68b
commit
a1549901ce
139
docs/modules/indexes/vectorstores/examples/singlestoredb.ipynb
Normal file
139
docs/modules/indexes/vectorstores/examples/singlestoredb.ipynb
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2b9582dc",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# SingleStoreDB vector search\n",
|
||||||
|
"[SingleStore DB](https://singlestore.com) is a high-performance distributed database that supports deployment both in the [cloud](https://www.singlestore.com/cloud/) and on-premises. For a significant duration, it has provided support for vector functions such as [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html), thereby positioning itself as an ideal solution for AI applications that require text similarity matching. \n",
|
||||||
|
"This tutorial illustrates how to utilize the features of the SingleStore DB Vector Store."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "e4a61a4d",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Establishing a connection to the database is facilitated through the singlestoredb Python connector.\n",
|
||||||
|
"# Please ensure that this connector is installed in your working environment.\n",
|
||||||
|
"!pip install singlestoredb"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "39a0132a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import getpass\n",
|
||||||
|
"\n",
|
||||||
|
"# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n",
|
||||||
|
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6104fde8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||||
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||||
|
"from langchain.vectorstores import SingleStoreDB\n",
|
||||||
|
"from langchain.document_loaders import TextLoader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7b45113c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Load text samples \n",
|
||||||
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
|
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||||
|
"documents = loader.load()\n",
|
||||||
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
|
"docs = text_splitter.split_documents(documents)\n",
|
||||||
|
"\n",
|
||||||
|
"embeddings = OpenAIEmbeddings()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "535b2687",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"There are several ways to establish a [connection](https://singlestoredb-python.labs.singlestore.com/generated/singlestoredb.connect.html) to the database. You can either set up environment variables or pass named parameters to the `SingleStoreDB constructor`. Alternatively, you may provide these parameters to the `from_documents` and `from_texts` methods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "d0b316bf",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Setup connection url as environment variable\n",
|
||||||
|
"os.environ['SINGLESTOREDB_URL'] = 'root:pass@localhost:3306/db'\n",
|
||||||
|
"\n",
|
||||||
|
"# Load documents to the store\n",
|
||||||
|
"docsearch = SingleStoreDB.from_documents(\n",
|
||||||
|
" docs,\n",
|
||||||
|
" embeddings,\n",
|
||||||
|
" table_name = \"noteook\", # use table with a custom name \n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0eaa4297",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||||
|
"docs = docsearch.similarity_search(query) # Find documents that correspond to the query\n",
|
||||||
|
"print(docs[0].page_content)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "86efff90",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -18,6 +18,7 @@ from langchain.vectorstores.opensearch_vector_search import OpenSearchVectorSear
|
|||||||
from langchain.vectorstores.pinecone import Pinecone
|
from langchain.vectorstores.pinecone import Pinecone
|
||||||
from langchain.vectorstores.qdrant import Qdrant
|
from langchain.vectorstores.qdrant import Qdrant
|
||||||
from langchain.vectorstores.redis import Redis
|
from langchain.vectorstores.redis import Redis
|
||||||
|
from langchain.vectorstores.singlestoredb import SingleStoreDB
|
||||||
from langchain.vectorstores.sklearn import SKLearnVectorStore
|
from langchain.vectorstores.sklearn import SKLearnVectorStore
|
||||||
from langchain.vectorstores.supabase import SupabaseVectorStore
|
from langchain.vectorstores.supabase import SupabaseVectorStore
|
||||||
from langchain.vectorstores.tair import Tair
|
from langchain.vectorstores.tair import Tair
|
||||||
@ -37,6 +38,7 @@ __all__ = [
|
|||||||
"Qdrant",
|
"Qdrant",
|
||||||
"Milvus",
|
"Milvus",
|
||||||
"Zilliz",
|
"Zilliz",
|
||||||
|
"SingleStoreDB",
|
||||||
"Chroma",
|
"Chroma",
|
||||||
"OpenSearchVectorSearch",
|
"OpenSearchVectorSearch",
|
||||||
"AtlasDB",
|
"AtlasDB",
|
||||||
|
@ -5,7 +5,18 @@ import asyncio
|
|||||||
import warnings
|
import warnings
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, TypeVar
|
from typing import (
|
||||||
|
Any,
|
||||||
|
ClassVar,
|
||||||
|
Collection,
|
||||||
|
Dict,
|
||||||
|
Iterable,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Type,
|
||||||
|
TypeVar,
|
||||||
|
)
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, root_validator
|
from pydantic import BaseModel, Field, root_validator
|
||||||
|
|
||||||
@ -347,6 +358,11 @@ class VectorStoreRetriever(BaseRetriever, BaseModel):
|
|||||||
vectorstore: VectorStore
|
vectorstore: VectorStore
|
||||||
search_type: str = "similarity"
|
search_type: str = "similarity"
|
||||||
search_kwargs: dict = Field(default_factory=dict)
|
search_kwargs: dict = Field(default_factory=dict)
|
||||||
|
allowed_search_types: ClassVar[Collection[str]] = (
|
||||||
|
"similarity",
|
||||||
|
"similarity_score_threshold",
|
||||||
|
"mmr",
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
"""Configuration for this pydantic object."""
|
"""Configuration for this pydantic object."""
|
||||||
@ -356,15 +372,15 @@ class VectorStoreRetriever(BaseRetriever, BaseModel):
|
|||||||
@root_validator()
|
@root_validator()
|
||||||
def validate_search_type(cls, values: Dict) -> Dict:
|
def validate_search_type(cls, values: Dict) -> Dict:
|
||||||
"""Validate search type."""
|
"""Validate search type."""
|
||||||
if "search_type" in values:
|
|
||||||
search_type = values["search_type"]
|
search_type = values["search_type"]
|
||||||
if search_type not in ("similarity", "similarity_score_threshold", "mmr"):
|
if search_type not in cls.allowed_search_types:
|
||||||
raise ValueError(f"search_type of {search_type} not allowed.")
|
raise ValueError(
|
||||||
|
f"search_type of {search_type} not allowed. Valid values are: "
|
||||||
|
f"{cls.allowed_search_types}"
|
||||||
|
)
|
||||||
if search_type == "similarity_score_threshold":
|
if search_type == "similarity_score_threshold":
|
||||||
score_threshold = values["search_kwargs"].get("score_threshold")
|
score_threshold = values["search_kwargs"].get("score_threshold")
|
||||||
if (score_threshold is None) or (
|
if (score_threshold is None) or (not isinstance(score_threshold, float)):
|
||||||
not isinstance(score_threshold, float)
|
|
||||||
):
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"`score_threshold` is not specified with a float value(0~1) "
|
"`score_threshold` is not specified with a float value(0~1) "
|
||||||
"in `search_kwargs`."
|
"in `search_kwargs`."
|
||||||
|
372
langchain/vectorstores/singlestoredb.py
Normal file
372
langchain/vectorstores/singlestoredb.py
Normal file
@ -0,0 +1,372 @@
|
|||||||
|
"""Wrapper around SingleStore DB."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
ClassVar,
|
||||||
|
Collection,
|
||||||
|
Iterable,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Type,
|
||||||
|
)
|
||||||
|
|
||||||
|
from sqlalchemy.pool import QueuePool
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.embeddings.base import Embeddings
|
||||||
|
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
|
||||||
|
|
||||||
|
|
||||||
|
class SingleStoreDB(VectorStore):
|
||||||
|
"""
|
||||||
|
This class serves as a Pythonic interface to the SingleStore DB database.
|
||||||
|
The prerequisite for using this class is the installation of the ``singlestoredb``
|
||||||
|
Python package.
|
||||||
|
|
||||||
|
The SingleStoreDB vectorstore can be created by providing an embedding function and
|
||||||
|
the relevant parameters for the database connection, connection pool, and
|
||||||
|
optionally, the names of the table and the fields to use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _get_connection(self: SingleStoreDB) -> Any:
|
||||||
|
try:
|
||||||
|
import singlestoredb as s2
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Could not import singlestoredb python package. "
|
||||||
|
"Please install it with `pip install singlestoredb`."
|
||||||
|
)
|
||||||
|
return s2.connect(**self.connection_kwargs)
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
embedding: Embeddings,
|
||||||
|
*,
|
||||||
|
table_name: str = "embeddings",
|
||||||
|
content_field: str = "content",
|
||||||
|
metadata_field: str = "metadata",
|
||||||
|
vector_field: str = "vector",
|
||||||
|
pool_size: int = 5,
|
||||||
|
max_overflow: int = 10,
|
||||||
|
timeout: float = 30,
|
||||||
|
**kwargs: Any,
|
||||||
|
):
|
||||||
|
"""Initialize with necessary components.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding (Embeddings): A text embedding model.
|
||||||
|
|
||||||
|
table_name (str, optional): Specifies the name of the table in use.
|
||||||
|
Defaults to "embeddings".
|
||||||
|
content_field (str, optional): Specifies the field to store the content.
|
||||||
|
Defaults to "content".
|
||||||
|
metadata_field (str, optional): Specifies the field to store metadata.
|
||||||
|
Defaults to "metadata".
|
||||||
|
vector_field (str, optional): Specifies the field to store the vector.
|
||||||
|
Defaults to "vector".
|
||||||
|
|
||||||
|
Following arguments pertain to the connection pool:
|
||||||
|
|
||||||
|
pool_size (int, optional): Determines the number of active connections in
|
||||||
|
the pool. Defaults to 5.
|
||||||
|
max_overflow (int, optional): Determines the maximum number of connections
|
||||||
|
allowed beyond the pool_size. Defaults to 10.
|
||||||
|
timeout (float, optional): Specifies the maximum wait time in seconds for
|
||||||
|
establishing a connection. Defaults to 30.
|
||||||
|
|
||||||
|
Following arguments pertain to the database connection:
|
||||||
|
|
||||||
|
host (str, optional): Specifies the hostname, IP address, or URL for the
|
||||||
|
database connection. The default scheme is "mysql".
|
||||||
|
user (str, optional): Database username.
|
||||||
|
password (str, optional): Database password.
|
||||||
|
port (int, optional): Database port. Defaults to 3306 for non-HTTP
|
||||||
|
connections, 80 for HTTP connections, and 443 for HTTPS connections.
|
||||||
|
database (str, optional): Database name.
|
||||||
|
|
||||||
|
Additional optional arguments provide further customization over the
|
||||||
|
database connection:
|
||||||
|
|
||||||
|
pure_python (bool, optional): Toggles the connector mode. If True,
|
||||||
|
operates in pure Python mode.
|
||||||
|
local_infile (bool, optional): Allows local file uploads.
|
||||||
|
charset (str, optional): Specifies the character set for string values.
|
||||||
|
ssl_key (str, optional): Specifies the path of the file containing the SSL
|
||||||
|
key.
|
||||||
|
ssl_cert (str, optional): Specifies the path of the file containing the SSL
|
||||||
|
certificate.
|
||||||
|
ssl_ca (str, optional): Specifies the path of the file containing the SSL
|
||||||
|
certificate authority.
|
||||||
|
ssl_cipher (str, optional): Sets the SSL cipher list.
|
||||||
|
ssl_disabled (bool, optional): Disables SSL usage.
|
||||||
|
ssl_verify_cert (bool, optional): Verifies the server's certificate.
|
||||||
|
Automatically enabled if ``ssl_ca`` is specified.
|
||||||
|
ssl_verify_identity (bool, optional): Verifies the server's identity.
|
||||||
|
conv (dict[int, Callable], optional): A dictionary of data conversion
|
||||||
|
functions.
|
||||||
|
credential_type (str, optional): Specifies the type of authentication to
|
||||||
|
use: auth.PASSWORD, auth.JWT, or auth.BROWSER_SSO.
|
||||||
|
autocommit (bool, optional): Enables autocommits.
|
||||||
|
results_type (str, optional): Determines the structure of the query results:
|
||||||
|
tuples, namedtuples, dicts.
|
||||||
|
results_format (str, optional): Deprecated. This option has been renamed to
|
||||||
|
results_type.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
Basic Usage:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
from langchain.vectorstores import SingleStoreDB
|
||||||
|
|
||||||
|
vectorstore = SingleStoreDB(
|
||||||
|
OpenAIEmbeddings(),
|
||||||
|
host="https://user:password@127.0.0.1:3306/database"
|
||||||
|
)
|
||||||
|
|
||||||
|
Advanced Usage:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
from langchain.vectorstores import SingleStoreDB
|
||||||
|
|
||||||
|
vectorstore = SingleStoreDB(
|
||||||
|
OpenAIEmbeddings(),
|
||||||
|
host="127.0.0.1",
|
||||||
|
port=3306,
|
||||||
|
user="user",
|
||||||
|
password="password",
|
||||||
|
database="db",
|
||||||
|
table_name="my_custom_table",
|
||||||
|
pool_size=10,
|
||||||
|
timeout=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
Using environment variables:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
from langchain.vectorstores import SingleStoreDB
|
||||||
|
|
||||||
|
os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
|
||||||
|
vectorstore = SingleStoreDB(OpenAIEmbeddings())
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.embedding = embedding
|
||||||
|
self.table_name = table_name
|
||||||
|
self.content_field = content_field
|
||||||
|
self.metadata_field = metadata_field
|
||||||
|
self.vector_field = vector_field
|
||||||
|
|
||||||
|
"""Pass the rest of the kwargs to the connection."""
|
||||||
|
self.connection_kwargs = kwargs
|
||||||
|
|
||||||
|
"""Create connection pool."""
|
||||||
|
self.connection_pool = QueuePool(
|
||||||
|
self._get_connection,
|
||||||
|
max_overflow=max_overflow,
|
||||||
|
pool_size=pool_size,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
self._create_table()
|
||||||
|
|
||||||
|
def _create_table(self: SingleStoreDB) -> None:
|
||||||
|
"""Create table if it doesn't exist."""
|
||||||
|
conn = self.connection_pool.connect()
|
||||||
|
try:
|
||||||
|
cur = conn.cursor()
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""CREATE TABLE IF NOT EXISTS {}
|
||||||
|
({} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
|
||||||
|
{} BLOB, {} JSON);""".format(
|
||||||
|
self.table_name,
|
||||||
|
self.content_field,
|
||||||
|
self.vector_field,
|
||||||
|
self.metadata_field,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
cur.close()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def add_texts(
|
||||||
|
self,
|
||||||
|
texts: Iterable[str],
|
||||||
|
metadatas: Optional[List[dict]] = None,
|
||||||
|
embeddings: Optional[List[List[float]]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[str]:
|
||||||
|
"""Add more texts to the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts (Iterable[str]): Iterable of strings/text to add to the vectorstore.
|
||||||
|
metadatas (Optional[List[dict]], optional): Optional list of metadatas.
|
||||||
|
Defaults to None.
|
||||||
|
embeddings (Optional[List[List[float]]], optional): Optional pre-generated
|
||||||
|
embeddings. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[str]: empty list
|
||||||
|
"""
|
||||||
|
conn = self.connection_pool.connect()
|
||||||
|
try:
|
||||||
|
cur = conn.cursor()
|
||||||
|
try:
|
||||||
|
# Write data to singlestore db
|
||||||
|
for i, text in enumerate(texts):
|
||||||
|
# Use provided values by default or fallback
|
||||||
|
metadata = metadatas[i] if metadatas else {}
|
||||||
|
embedding = (
|
||||||
|
embeddings[i]
|
||||||
|
if embeddings
|
||||||
|
else self.embedding.embed_documents([text])[0]
|
||||||
|
)
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO {} VALUES (%s, JSON_ARRAY_PACK(%s), %s)".format(
|
||||||
|
self.table_name
|
||||||
|
),
|
||||||
|
(
|
||||||
|
text,
|
||||||
|
"[{}]".format(",".join(map(str, embedding))),
|
||||||
|
json.dumps(metadata),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
cur.close()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
return []
|
||||||
|
|
||||||
|
def similarity_search(
|
||||||
|
self, query: str, k: int = 4, **kwargs: Any
|
||||||
|
) -> List[Document]:
|
||||||
|
"""Returns the most similar indexed documents to the query text.
|
||||||
|
|
||||||
|
Uses cosine similarity.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query (str): The query text for which to find similar documents.
|
||||||
|
k (int): The number of documents to return. Default is 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Document]: A list of documents that are most similar to the query text.
|
||||||
|
"""
|
||||||
|
docs_and_scores = self.similarity_search_with_score(query, k=k)
|
||||||
|
return [doc for doc, _ in docs_and_scores]
|
||||||
|
|
||||||
|
def similarity_search_with_score(
|
||||||
|
self, query: str, k: int = 4
|
||||||
|
) -> List[Tuple[Document, float]]:
|
||||||
|
"""Return docs most similar to query. Uses cosine similarity.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Text to look up documents similar to.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query and score for each
|
||||||
|
"""
|
||||||
|
# Creates embedding vector from user query
|
||||||
|
embedding = self.embedding.embed_query(query)
|
||||||
|
conn = self.connection_pool.connect()
|
||||||
|
result = []
|
||||||
|
try:
|
||||||
|
cur = conn.cursor()
|
||||||
|
try:
|
||||||
|
cur.execute(
|
||||||
|
"""SELECT {}, {}, DOT_PRODUCT({}, JSON_ARRAY_PACK(%s)) as __score
|
||||||
|
FROM {} ORDER BY __score DESC LIMIT %s""".format(
|
||||||
|
self.content_field,
|
||||||
|
self.metadata_field,
|
||||||
|
self.vector_field,
|
||||||
|
self.table_name,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"[{}]".format(",".join(map(str, embedding))),
|
||||||
|
k,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
for row in cur.fetchall():
|
||||||
|
doc = Document(page_content=row[0], metadata=row[1])
|
||||||
|
result.append((doc, float(row[2])))
|
||||||
|
finally:
|
||||||
|
cur.close()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_texts(
|
||||||
|
cls: Type[SingleStoreDB],
|
||||||
|
texts: List[str],
|
||||||
|
embedding: Embeddings,
|
||||||
|
metadatas: Optional[List[dict]] = None,
|
||||||
|
table_name: str = "embeddings",
|
||||||
|
content_field: str = "content",
|
||||||
|
metadata_field: str = "metadata",
|
||||||
|
vector_field: str = "vector",
|
||||||
|
pool_size: int = 5,
|
||||||
|
max_overflow: int = 10,
|
||||||
|
timeout: float = 30,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> SingleStoreDB:
|
||||||
|
"""Create a SingleStoreDB vectorstore from raw documents.
|
||||||
|
This is a user-friendly interface that:
|
||||||
|
1. Embeds documents.
|
||||||
|
2. Creates a new table for the embeddings in SingleStoreDB.
|
||||||
|
3. Adds the documents to the newly created table.
|
||||||
|
This is intended to be a quick way to get started.
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
from langchain.vectorstores import SingleStoreDB
|
||||||
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
s2 = SingleStoreDB.from_texts(
|
||||||
|
texts,
|
||||||
|
OpenAIEmbeddings(),
|
||||||
|
host="username:password@localhost:3306/database"
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
instance = cls(
|
||||||
|
embedding,
|
||||||
|
table_name=table_name,
|
||||||
|
content_field=content_field,
|
||||||
|
metadata_field=metadata_field,
|
||||||
|
vector_field=vector_field,
|
||||||
|
pool_size=pool_size,
|
||||||
|
max_overflow=max_overflow,
|
||||||
|
timeout=timeout,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
instance.add_texts(texts, metadatas, embedding.embed_documents(texts), **kwargs)
|
||||||
|
return instance
|
||||||
|
|
||||||
|
def as_retriever(self, **kwargs: Any) -> SingleStoreDBRetriever:
|
||||||
|
return SingleStoreDBRetriever(vectorstore=self, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class SingleStoreDBRetriever(VectorStoreRetriever):
|
||||||
|
vectorstore: SingleStoreDB
|
||||||
|
k: int = 4
|
||||||
|
allowed_search_types: ClassVar[Collection[str]] = ("similarity",)
|
||||||
|
|
||||||
|
def get_relevant_documents(self, query: str) -> List[Document]:
|
||||||
|
if self.search_type == "similarity":
|
||||||
|
docs = self.vectorstore.similarity_search(query, k=self.k)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||||
|
return docs
|
||||||
|
|
||||||
|
async def aget_relevant_documents(self, query: str) -> List[Document]:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"SingleStoreDBVectorStoreRetriever does not support async"
|
||||||
|
)
|
98
poetry.lock
generated
98
poetry.lock
generated
@ -1,4 +1,4 @@
|
|||||||
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
|
# This file is automatically @generated by Poetry and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "absl-py"
|
name = "absl-py"
|
||||||
@ -948,6 +948,30 @@ urllib3 = ">=1.25.4,<1.27"
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
crt = ["awscrt (==0.16.9)"]
|
crt = ["awscrt (==0.16.9)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "build"
|
||||||
|
version = "0.10.0"
|
||||||
|
description = "A simple, correct Python build frontend"
|
||||||
|
category = "main"
|
||||||
|
optional = true
|
||||||
|
python-versions = ">= 3.7"
|
||||||
|
files = [
|
||||||
|
{file = "build-0.10.0-py3-none-any.whl", hash = "sha256:af266720050a66c893a6096a2f410989eeac74ff9a68ba194b3f6473e8e26171"},
|
||||||
|
{file = "build-0.10.0.tar.gz", hash = "sha256:d5b71264afdb5951d6704482aac78de887c80691c52b88a9ad195983ca2c9269"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
colorama = {version = "*", markers = "os_name == \"nt\""}
|
||||||
|
packaging = ">=19.0"
|
||||||
|
pyproject_hooks = "*"
|
||||||
|
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["furo (>=2021.08.31)", "sphinx (>=4.0,<5.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)"]
|
||||||
|
test = ["filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0)", "setuptools (>=56.0.0)", "toml (>=0.10.0)", "wheel (>=0.36.0)"]
|
||||||
|
typing = ["importlib-metadata (>=5.1)", "mypy (==0.991)", "tomli", "typing-extensions (>=3.7.4.3)"]
|
||||||
|
virtualenv = ["virtualenv (>=20.0.35)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cachetools"
|
name = "cachetools"
|
||||||
version = "5.3.1"
|
version = "5.3.1"
|
||||||
@ -7409,6 +7433,21 @@ files = [
|
|||||||
doc = ["sphinx", "sphinx_rtd_theme"]
|
doc = ["sphinx", "sphinx_rtd_theme"]
|
||||||
test = ["flake8", "isort", "pytest"]
|
test = ["flake8", "isort", "pytest"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyproject-hooks"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "Wrappers to call pyproject.toml-based build backend hooks."
|
||||||
|
category = "main"
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "pyproject_hooks-1.0.0-py3-none-any.whl", hash = "sha256:283c11acd6b928d2f6a7c73fa0d01cb2bdc5f07c57a2eeb6e83d5e56b97976f8"},
|
||||||
|
{file = "pyproject_hooks-1.0.0.tar.gz", hash = "sha256:f271b298b97f5955d53fb12b72c1fb1948c22c1a6b70b315c54cedaca0264ef5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyreadline3"
|
name = "pyreadline3"
|
||||||
version = "3.4.1"
|
version = "3.4.1"
|
||||||
@ -8603,6 +8642,39 @@ files = [
|
|||||||
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
|
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "singlestoredb"
|
||||||
|
version = "0.6.1"
|
||||||
|
description = "Interface to the SingleStore database and cluster management APIs"
|
||||||
|
category = "main"
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "singlestoredb-0.6.1-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf1769e53993981420650a02c59ba367913d9f0256948cc98f6f9d464f74852a"},
|
||||||
|
{file = "singlestoredb-0.6.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4e90fa1dfde1e31f7abe011f75d9dc8cccbc35b968ed8381bd44c0b7dd4026b"},
|
||||||
|
{file = "singlestoredb-0.6.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44d361c3fa4de6228b525d0b1d22db75790d8e6fb84c3d0b2213bf41774d4323"},
|
||||||
|
{file = "singlestoredb-0.6.1-cp36-abi3-win32.whl", hash = "sha256:ad9543c41286a2095718ad7e133cc8b3b5de938f731157fbb2d4d2b0d1623aff"},
|
||||||
|
{file = "singlestoredb-0.6.1-cp36-abi3-win_amd64.whl", hash = "sha256:f9f9feda947b9fe9182863758118c8961ebb74281098b42894c99b58d30b2526"},
|
||||||
|
{file = "singlestoredb-0.6.1.tar.gz", hash = "sha256:2e00f4cd869dc1ecf33df853c521ebd6ce913af2bf3b2f98675ffa3dc6911636"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
build = "*"
|
||||||
|
PyJWT = "*"
|
||||||
|
requests = "*"
|
||||||
|
sqlparams = "*"
|
||||||
|
wheel = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dataframe = ["ibis-singlestoredb"]
|
||||||
|
dbt = ["dbt-singlestore"]
|
||||||
|
ed22519 = ["PyNaCl (>=1.4.0)"]
|
||||||
|
gssapi = ["gssapi"]
|
||||||
|
ibis = ["ibis-singlestoredb"]
|
||||||
|
kerberos = ["gssapi"]
|
||||||
|
rsa = ["cryptography"]
|
||||||
|
sqlalchemy = ["sqlalchemy-singlestoredb"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "six"
|
name = "six"
|
||||||
version = "1.16.0"
|
version = "1.16.0"
|
||||||
@ -9137,6 +9209,18 @@ files = [
|
|||||||
{file = "sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c"},
|
{file = "sqlitedict-2.1.0.tar.gz", hash = "sha256:03d9cfb96d602996f1d4c2db2856f1224b96a9c431bdd16e78032a72940f9e8c"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sqlparams"
|
||||||
|
version = "5.1.0"
|
||||||
|
description = "Convert between various DB API 2.0 parameter styles."
|
||||||
|
category = "main"
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "sqlparams-5.1.0-py3-none-any.whl", hash = "sha256:ee4ef620a5197535e5ebb9217e2f453f08b044634b3d890f3d6701e4f838c85c"},
|
||||||
|
{file = "sqlparams-5.1.0.tar.gz", hash = "sha256:1abe87a0684567265b2b86f5a482d5c37db237c0268d4c81774ffedce4300199"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "srsly"
|
name = "srsly"
|
||||||
version = "2.4.6"
|
version = "2.4.6"
|
||||||
@ -9842,7 +9926,7 @@ files = [
|
|||||||
name = "tomli"
|
name = "tomli"
|
||||||
version = "2.0.1"
|
version = "2.0.1"
|
||||||
description = "A lil' TOML parser"
|
description = "A lil' TOML parser"
|
||||||
category = "dev"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
@ -11227,13 +11311,13 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
|
|||||||
cffi = ["cffi (>=1.11)"]
|
cffi = ["cffi (>=1.11)"]
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "momento", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
|
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "pymongo", "weaviate-client", "redis", "google-api-python-client", "google-auth", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "langkit", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "steamship", "pdfminer-six", "lxml", "requests-toolbelt", "neo4j", "openlm", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "momento", "singlestoredb", "tigrisdb"]
|
||||||
azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "openai"]
|
azure = ["azure-identity", "azure-cosmos", "openai", "azure-core", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech"]
|
||||||
cohere = ["cohere"]
|
cohere = ["cohere"]
|
||||||
docarray = ["docarray"]
|
docarray = ["docarray"]
|
||||||
embeddings = ["sentence-transformers"]
|
embeddings = ["sentence-transformers"]
|
||||||
extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "bibtexparser", "chardet", "gql", "html2text", "jq", "lxml", "pandas", "pdfminer-six", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "requests-toolbelt", "scikit-learn", "telethon", "tqdm", "zep-python"]
|
extended-testing = ["beautifulsoup4", "bibtexparser", "chardet", "jq", "pdfminer-six", "pypdf", "pymupdf", "pypdfium2", "tqdm", "lxml", "atlassian-python-api", "beautifulsoup4", "pandas", "telethon", "psychicapi", "zep-python", "gql", "requests-toolbelt", "html2text", "py-trello", "scikit-learn", "pyspark"]
|
||||||
llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
|
llms = ["anthropic", "cohere", "openai", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
|
||||||
openai = ["openai", "tiktoken"]
|
openai = ["openai", "tiktoken"]
|
||||||
qdrant = ["qdrant-client"]
|
qdrant = ["qdrant-client"]
|
||||||
text-helpers = ["chardet"]
|
text-helpers = ["chardet"]
|
||||||
@ -11241,4 +11325,4 @@ text-helpers = ["chardet"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0"
|
python-versions = ">=3.8.1,<4.0"
|
||||||
content-hash = "8c0ab1bdc8b506e38e6fa4cba40dcf2df47473212d47fa1086c6aae8ddf2c021"
|
content-hash = "faeb3cc6feb059096a66ba8b1fd2271cd91e3a9553cb4f05e5ea493610ac3763"
|
||||||
|
@ -101,10 +101,12 @@ azure-cognitiveservices-speech = {version = "^1.28.0", optional = true}
|
|||||||
py-trello = {version = "^0.19.0", optional = true}
|
py-trello = {version = "^0.19.0", optional = true}
|
||||||
momento = {version = "^1.5.0", optional = true}
|
momento = {version = "^1.5.0", optional = true}
|
||||||
bibtexparser = {version = "^1.4.0", optional = true}
|
bibtexparser = {version = "^1.4.0", optional = true}
|
||||||
|
singlestoredb = {version = "^0.6.1", optional = true}
|
||||||
pyspark = {version = "^3.4.0", optional = true}
|
pyspark = {version = "^3.4.0", optional = true}
|
||||||
tigrisdb = {version = "^1.0.0b6", optional = true}
|
tigrisdb = {version = "^1.0.0b6", optional = true}
|
||||||
langchainplus-sdk = ">=0.0.6"
|
langchainplus-sdk = ">=0.0.6"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.docs.dependencies]
|
[tool.poetry.group.docs.dependencies]
|
||||||
autodoc_pydantic = "^1.8.0"
|
autodoc_pydantic = "^1.8.0"
|
||||||
myst_parser = "^0.18.1"
|
myst_parser = "^0.18.1"
|
||||||
@ -280,6 +282,7 @@ all = [
|
|||||||
"azure-ai-vision",
|
"azure-ai-vision",
|
||||||
"azure-cognitiveservices-speech",
|
"azure-cognitiveservices-speech",
|
||||||
"momento",
|
"momento",
|
||||||
|
"singlestoredb",
|
||||||
"tigrisdb"
|
"tigrisdb"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
142
tests/integration_tests/vectorstores/test_singlestoredb.py
Normal file
142
tests/integration_tests/vectorstores/test_singlestoredb.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
"""Test SingleStoreDB functionality."""
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.vectorstores.singlestoredb import SingleStoreDB
|
||||||
|
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||||
|
|
||||||
|
TEST_SINGLESTOREDB_URL = "root:pass@localhost:3306/db"
|
||||||
|
TEST_SINGLE_RESULT = [Document(page_content="foo")]
|
||||||
|
TEST_SINGLE_WITH_METADATA_RESULT = [Document(page_content="foo", metadata={"a": "b"})]
|
||||||
|
TEST_RESULT = [Document(page_content="foo"), Document(page_content="foo")]
|
||||||
|
|
||||||
|
try:
|
||||||
|
import singlestoredb as s2
|
||||||
|
|
||||||
|
singlestoredb_installed = True
|
||||||
|
except ImportError:
|
||||||
|
singlestoredb_installed = False
|
||||||
|
|
||||||
|
|
||||||
|
def drop(table_name: str) -> None:
|
||||||
|
with s2.connect(TEST_SINGLESTOREDB_URL) as conn:
|
||||||
|
conn.autocommit(True)
|
||||||
|
with conn.cursor() as cursor:
|
||||||
|
cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
|
||||||
|
|
||||||
|
|
||||||
|
class NormilizedFakeEmbeddings(FakeEmbeddings):
|
||||||
|
"""Fake embeddings with normalization. For testing purposes."""
|
||||||
|
|
||||||
|
def normalize(self, vector: List[float]) -> List[float]:
|
||||||
|
"""Normalize vector."""
|
||||||
|
return [float(v / np.linalg.norm(vector)) for v in vector]
|
||||||
|
|
||||||
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||||
|
return [self.normalize(v) for v in super().embed_documents(texts)]
|
||||||
|
|
||||||
|
def embed_query(self, text: str) -> List[float]:
|
||||||
|
return self.normalize(super().embed_query(text))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def texts() -> List[str]:
|
||||||
|
return ["foo", "bar", "baz"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||||
|
def test_singlestoredb(texts: List[str]) -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
table_name = "test_singlestoredb"
|
||||||
|
drop(table_name)
|
||||||
|
docsearch = SingleStoreDB.from_texts(
|
||||||
|
texts,
|
||||||
|
NormilizedFakeEmbeddings(),
|
||||||
|
table_name=table_name,
|
||||||
|
host=TEST_SINGLESTOREDB_URL,
|
||||||
|
)
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == TEST_SINGLE_RESULT
|
||||||
|
drop(table_name)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||||
|
def test_singlestoredb_new_vector(texts: List[str]) -> None:
|
||||||
|
"""Test adding a new document"""
|
||||||
|
table_name = "test_singlestoredb_new_vector"
|
||||||
|
drop(table_name)
|
||||||
|
docsearch = SingleStoreDB.from_texts(
|
||||||
|
texts,
|
||||||
|
NormilizedFakeEmbeddings(),
|
||||||
|
table_name=table_name,
|
||||||
|
host=TEST_SINGLESTOREDB_URL,
|
||||||
|
)
|
||||||
|
docsearch.add_texts(["foo"])
|
||||||
|
output = docsearch.similarity_search("foo", k=2)
|
||||||
|
assert output == TEST_RESULT
|
||||||
|
drop(table_name)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||||
|
def test_singlestoredb_from_existing(texts: List[str]) -> None:
|
||||||
|
"""Test adding a new document"""
|
||||||
|
table_name = "test_singlestoredb_from_existing"
|
||||||
|
drop(table_name)
|
||||||
|
SingleStoreDB.from_texts(
|
||||||
|
texts,
|
||||||
|
NormilizedFakeEmbeddings(),
|
||||||
|
table_name=table_name,
|
||||||
|
host=TEST_SINGLESTOREDB_URL,
|
||||||
|
)
|
||||||
|
# Test creating from an existing
|
||||||
|
docsearch2 = SingleStoreDB(
|
||||||
|
NormilizedFakeEmbeddings(),
|
||||||
|
table_name="test_singlestoredb_from_existing",
|
||||||
|
host=TEST_SINGLESTOREDB_URL,
|
||||||
|
)
|
||||||
|
output = docsearch2.similarity_search("foo", k=1)
|
||||||
|
assert output == TEST_SINGLE_RESULT
|
||||||
|
drop(table_name)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||||
|
def test_singlestoredb_from_documents(texts: List[str]) -> None:
|
||||||
|
"""Test from_documents constructor."""
|
||||||
|
table_name = "test_singlestoredb_from_documents"
|
||||||
|
drop(table_name)
|
||||||
|
docs = [Document(page_content=t, metadata={"a": "b"}) for t in texts]
|
||||||
|
docsearch = SingleStoreDB.from_documents(
|
||||||
|
docs,
|
||||||
|
NormilizedFakeEmbeddings(),
|
||||||
|
table_name=table_name,
|
||||||
|
host=TEST_SINGLESTOREDB_URL,
|
||||||
|
)
|
||||||
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
|
assert output == TEST_SINGLE_WITH_METADATA_RESULT
|
||||||
|
drop(table_name)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||||
|
def test_singlestoredb_add_texts_to_existing(texts: List[str]) -> None:
|
||||||
|
"""Test adding a new document"""
|
||||||
|
table_name = "test_singlestoredb_add_texts_to_existing"
|
||||||
|
drop(table_name)
|
||||||
|
# Test creating from an existing
|
||||||
|
SingleStoreDB.from_texts(
|
||||||
|
texts,
|
||||||
|
NormilizedFakeEmbeddings(),
|
||||||
|
table_name=table_name,
|
||||||
|
host=TEST_SINGLESTOREDB_URL,
|
||||||
|
)
|
||||||
|
docsearch = SingleStoreDB(
|
||||||
|
NormilizedFakeEmbeddings(),
|
||||||
|
table_name=table_name,
|
||||||
|
host=TEST_SINGLESTOREDB_URL,
|
||||||
|
)
|
||||||
|
docsearch.add_texts(["foo"])
|
||||||
|
output = docsearch.similarity_search("foo", k=2)
|
||||||
|
assert output == TEST_RESULT
|
||||||
|
drop(table_name)
|
Loading…
Reference in New Issue
Block a user