community[patch]: add hybrid search to singlestoredb vectorstore (#20793)

Implemented the ability to enable full-text search within the SingleStore vector store, offering users a versatile range of search strategies. This enhancement allows users to seamlessly combine full-text search with vector search, enabling the following search strategies: * Search solely by vector similarity. * Conduct searches exclusively based on text similarity, utilizing Lucene internally. * Filter search results by text similarity score, with the option to specify a threshold, followed by a search based on vector similarity. * Filter results by vector similarity score before conducting a search based on text similarity. * Perform searches using a weighted sum of vector and text similarity scores. Additionally, integration tests have been added to comprehensively cover all scenarios. Updated notebook with examples. CC: @baskaryan, @hwchase17 --------- Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-13 21:47:12 +00:00 · 2024-04-25 00:34:50 +03:00
parent 9efab3ed66
commit 493afe4d8d
3 changed files with 879 additions and 53 deletions
--- a/libs/community/langchain_community/vectorstores/singlestoredb.py
+++ b/libs/community/langchain_community/vectorstores/singlestoredb.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 import json
 import re
+from enum import Enum
 from typing import (
    Any,
    Callable,
@@ -38,6 +39,15 @@ class SingleStoreDB(VectorStore):
    optionally, the names of the table and the fields to use.
    """

+    class SearchStrategy(str, Enum):
+        """Enumerator of the Search strategies for searching in the vectorstore."""
+
+        VECTOR_ONLY = "VECTOR_ONLY"
+        TEXT_ONLY = "TEXT_ONLY"
+        FILTER_BY_TEXT = "FILTER_BY_TEXT"
+        FILTER_BY_VECTOR = "FILTER_BY_VECTOR"
+        WEIGHTED_SUM = "WEIGHTED_SUM"
+
    def _get_connection(self: SingleStoreDB) -> Any:
        try:
            import singlestoredb as s2
@@ -57,10 +67,12 @@ class SingleStoreDB(VectorStore):
        content_field: str = "content",
        metadata_field: str = "metadata",
        vector_field: str = "vector",
+        id_field: str = "id",
        use_vector_index: bool = False,
        vector_index_name: str = "",
        vector_index_options: Optional[dict] = None,
        vector_size: int = 1536,
+        use_full_text_search: bool = False,
        pool_size: int = 5,
        max_overflow: int = 10,
        timeout: float = 30,
@@ -81,7 +93,8 @@ class SingleStoreDB(VectorStore):
                - EUCLIDEAN_DISTANCE: Computes the Euclidean distance between
                    two vectors. This metric considers the geometric distance in
                    the vector space, and might be more suitable for embeddings
-                    that rely on spatial relationships.
+                    that rely on spatial relationships. This metric is not
+                    compatible with the WEIGHTED_SUM search strategy.

            table_name (str, optional): Specifies the name of the table in use.
                Defaults to "embeddings".
@@ -91,6 +104,8 @@ class SingleStoreDB(VectorStore):
                Defaults to "metadata".
            vector_field (str, optional): Specifies the field to store the vector.
                Defaults to "vector".
+            id_field (str, optional): Specifies the field to store the id.
+                Defaults to "id".

            use_vector_index (bool, optional): Toggles the use of a vector index.
                Works only with SingleStoreDB 8.5 or later. Defaults to False.
@@ -113,6 +128,14 @@ class SingleStoreDB(VectorStore):
                Should be set to the same value as the size of the vectors
                stored in the vector_field.

+            use_full_text_search (bool, optional): Toggles the use a full-text index
+                on the document content. Defaults to False. If set to True, the table
+                will be created with a full-text index on the content field,
+                and the simularity_search method will all using TEXT_ONLY,
+                FILTER_BY_TEXT, FILTER_BY_VECTOR, and WIGHTED_SUM search strategies.
+                If set to False, the simularity_search method will only allow
+                VECTOR_ONLY search strategy.
+
            Following arguments pertain to the connection pool:

            pool_size (int, optional): Determines the number of active connections in
@@ -165,7 +188,7 @@ class SingleStoreDB(VectorStore):

            .. code-block:: python

-                from langchain_community.embeddings import OpenAIEmbeddings
+                from langchain_openai import OpenAIEmbeddings
                from langchain_community.vectorstores import SingleStoreDB

                vectorstore = SingleStoreDB(
@@ -177,7 +200,7 @@ class SingleStoreDB(VectorStore):

            .. code-block:: python

-                from langchain_community.embeddings import OpenAIEmbeddings
+                from langchain_openai import OpenAIEmbeddings
                from langchain_community.vectorstores import SingleStoreDB

                vectorstore = SingleStoreDB(
@@ -197,7 +220,7 @@ class SingleStoreDB(VectorStore):

            .. code-block:: python

-                from langchain_community.embeddings import OpenAIEmbeddings
+                from langchain_openai import OpenAIEmbeddings
                from langchain_community.vectorstores import SingleStoreDB

                os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
@@ -207,7 +230,7 @@ class SingleStoreDB(VectorStore):

            .. code-block:: python

-                from langchain_community.embeddings import OpenAIEmbeddings
+                from langchain_openai import OpenAIEmbeddings
                from langchain_community.vectorstores import SingleStoreDB

                os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
@@ -215,6 +238,18 @@ class SingleStoreDB(VectorStore):
                    OpenAIEmbeddings(),
                    use_vector_index=True,
                )
+
+            Using full-text index:
+
+            .. code-block:: python
+                from langchain_openai import OpenAIEmbeddings
+                from langchain_community.vectorstores import SingleStoreDB
+
+                os.environ['SINGLESTOREDB_URL'] = 'me:p455w0rd@s2-host.com/my_db'
+                vectorstore = SingleStoreDB(
+                    OpenAIEmbeddings(),
+                    use_full_text_search=True,
+                )
        """

        self.embedding = embedding
@@ -223,6 +258,7 @@ class SingleStoreDB(VectorStore):
        self.content_field = self._sanitize_input(content_field)
        self.metadata_field = self._sanitize_input(metadata_field)
        self.vector_field = self._sanitize_input(vector_field)
+        self.id_field = self._sanitize_input(id_field)

        self.use_vector_index = bool(use_vector_index)
        self.vector_index_name = self._sanitize_input(vector_index_name)
@@ -230,6 +266,8 @@ class SingleStoreDB(VectorStore):
        self.vector_index_options["metric_type"] = self.distance_strategy
        self.vector_size = int(vector_size)

+        self.use_full_text_search = bool(use_full_text_search)
+
        # Pass the rest of the kwargs to the connection.
        self.connection_kwargs = kwargs

@@ -238,7 +276,7 @@ class SingleStoreDB(VectorStore):
            self.connection_kwargs["conn_attrs"] = dict()

        self.connection_kwargs["conn_attrs"]["_connector_name"] = "langchain python sdk"
-        self.connection_kwargs["conn_attrs"]["_connector_version"] = "1.0.2"
+        self.connection_kwargs["conn_attrs"]["_connector_version"] = "2.0.0"

        # Create connection pool.
        self.connection_pool = QueuePool(
@@ -266,6 +304,9 @@ class SingleStoreDB(VectorStore):
        try:
            cur = conn.cursor()
            try:
+                full_text_index = ""
+                if self.use_full_text_search:
+                    full_text_index = ", FULLTEXT({})".format(self.content_field)
                if self.use_vector_index:
                    index_options = ""
                    if self.vector_index_options and len(self.vector_index_options) > 0:
@@ -274,10 +315,11 @@ class SingleStoreDB(VectorStore):
                        )
                    cur.execute(
                        """CREATE TABLE IF NOT EXISTS {}
-                        ({} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
-                        {} VECTOR({}, F32) NOT NULL, {} JSON,
-                        VECTOR INDEX {} ({}) {});""".format(
+                        ({} BIGINT AUTO_INCREMENT PRIMARY KEY, {} LONGTEXT CHARACTER
+                        SET utf8mb4 COLLATE utf8mb4_general_ci, {} VECTOR({}, F32)
+                        NOT NULL, {} JSON, VECTOR INDEX {} ({}) {}{});""".format(
                            self.table_name,
+                            self.id_field,
                            self.content_field,
                            self.vector_field,
                            self.vector_size,
@@ -285,17 +327,21 @@ class SingleStoreDB(VectorStore):
                            self.vector_index_name,
                            self.vector_field,
                            index_options,
+                            full_text_index,
                        ),
                    )
                else:
                    cur.execute(
                        """CREATE TABLE IF NOT EXISTS {}
-                        ({} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
-                        {} BLOB, {} JSON);""".format(
+                        ({} BIGINT AUTO_INCREMENT PRIMARY KEY, {} LONGTEXT CHARACTER
+                        SET utf8mb4 COLLATE utf8mb4_general_ci, {} BLOB, {} JSON{});
+                        """.format(
                            self.table_name,
+                            self.id_field,
                            self.content_field,
                            self.vector_field,
                            self.metadata_field,
+                            full_text_index,
                        ),
                    )
            finally:
@@ -365,8 +411,12 @@ class SingleStoreDB(VectorStore):
                        else self.embedding.embed_documents([text])[0]
                    )
                    cur.execute(
-                        "INSERT INTO {} VALUES (%s, JSON_ARRAY_PACK(%s), %s)".format(
-                            self.table_name
+                        """INSERT INTO {}({}, {}, {})
+                        VALUES (%s, JSON_ARRAY_PACK(%s), %s)""".format(
+                            self.table_name,
+                            self.content_field,
+                            self.vector_field,
+                            self.metadata_field,
                        ),
                        (
                            text,
@@ -374,7 +424,7 @@ class SingleStoreDB(VectorStore):
                            json.dumps(metadata),
                        ),
                    )
-                if self.use_vector_index:
+                if self.use_vector_index or self.use_full_text_search:
                    cur.execute("OPTIMIZE TABLE {} FLUSH;".format(self.table_name))
            finally:
                cur.close()
@@ -383,7 +433,16 @@ class SingleStoreDB(VectorStore):
        return []

    def similarity_search(
-        self, query: str, k: int = 4, filter: Optional[dict] = None, **kwargs: Any
+        self,
+        query: str,
+        k: int = 4,
+        filter: Optional[dict] = None,
+        search_strategy: SearchStrategy = SearchStrategy.VECTOR_ONLY,
+        filter_threshold: float = 0,
+        text_weight: float = 0.5,
+        vector_weight: float = 0.5,
+        vector_select_count_multiplier: int = 10,
+        **kwargs: Any,
    ) -> List[Document]:
        """Returns the most similar indexed documents to the query text.

@@ -393,29 +452,119 @@ class SingleStoreDB(VectorStore):
            query (str): The query text for which to find similar documents.
            k (int): The number of documents to return. Default is 4.
            filter (dict): A dictionary of metadata fields and values to filter by.
+                Default is None.
+            search_strategy (SearchStrategy): The search strategy to use.
+                Default is SearchStrategy.VECTOR_ONLY.
+                Available options are:
+                - SearchStrategy.VECTOR_ONLY: Searches only by vector similarity.
+                - SearchStrategy.TEXT_ONLY: Searches only by text similarity. This
+                    option is only available if use_full_text_search is True.
+                - SearchStrategy.FILTER_BY_TEXT: Filters by text similarity and
+                    searches by vector similarity. This option is only available if
+                    use_full_text_search is True.
+                - SearchStrategy.FILTER_BY_VECTOR: Filters by vector similarity and
+                    searches by text similarity. This option is only available if
+                    use_full_text_search is True.
+                - SearchStrategy.WEIGHTED_SUM: Searches by a weighted sum of text and
+                    vector similarity. This option is only available if
+                    use_full_text_search is True and distance_strategy is DOT_PRODUCT.
+            filter_threshold (float): The threshold for filtering by text or vector
+                similarity. Default is 0. This option has effect only if search_strategy
+                is SearchStrategy.FILTER_BY_TEXT or SearchStrategy.FILTER_BY_VECTOR.
+            text_weight (float): The weight of text similarity in the weighted sum
+                search strategy. Default is 0.5. This option has effect only if
+                search_strategy is SearchStrategy.WEIGHTED_SUM.
+            vector_weight (float): The weight of vector similarity in the weighted sum
+                search strategy. Default is 0.5. This option has effect only if
+                search_strategy is SearchStrategy.WEIGHTED_SUM.
+            vector_select_count_multiplier (int): The multiplier for the number of
+                vectors to select when using the vector index. Default is 10.
+                This parameter has effect only if use_vector_index is True and
+                search_strategy is SearchStrategy.WEIGHTED_SUM or
+                SearchStrategy.FILTER_BY_TEXT.
+                The number of vectors selected will
+                be k * vector_select_count_multiplier.
+                This is needed due to the limitations of the vector index.
+

        Returns:
            List[Document]: A list of documents that are most similar to the query text.

        Examples:
+
+            Basic Usage:
            .. code-block:: python
+
                from langchain_community.vectorstores import SingleStoreDB
-                from langchain_community.embeddings import OpenAIEmbeddings
+                from langchain_openai import OpenAIEmbeddings
+
                s2 = SingleStoreDB.from_documents(
                    docs,
                    OpenAIEmbeddings(),
                    host="username:password@localhost:3306/database"
                )
-                s2.similarity_search("query text", 1,
-                    {"metadata_field": "metadata_value"})
+                results = s2.similarity_search("query text", 1,
+                                    {"metadata_field": "metadata_value"})
+
+            Different Search Strategies:
+            .. code-block:: python
+
+                from langchain_community.vectorstores import SingleStoreDB
+                from langchain_openai import OpenAIEmbeddings
+
+                s2 = SingleStoreDB.from_documents(
+                    docs,
+                    OpenAIEmbeddings(),
+                    host="username:password@localhost:3306/database",
+                    use_full_text_search=True,
+                    use_vector_index=True,
+                )
+                results = s2.similarity_search("query text", 1,
+                        search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_TEXT,
+                        filter_threshold=0.5)
+
+            Weighted Sum Search Strategy:
+            .. code-block:: python
+
+                from langchain_community.vectorstores import SingleStoreDB
+                from langchain_openai import OpenAIEmbeddings
+
+                s2 = SingleStoreDB.from_documents(
+                    docs,
+                    OpenAIEmbeddings(),
+                    host="username:password@localhost:3306/database",
+                    use_full_text_search=True,
+                    use_vector_index=True,
+                )
+                results = s2.similarity_search("query text", 1,
+                    search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
+                    text_weight=0.3,
+                    vector_weight=0.7)
        """
        docs_and_scores = self.similarity_search_with_score(
-            query=query, k=k, filter=filter
+            query=query,
+            k=k,
+            filter=filter,
+            search_strategy=search_strategy,
+            filter_threshold=filter_threshold,
+            text_weight=text_weight,
+            vector_weight=vector_weight,
+            vector_select_count_multiplier=vector_select_count_multiplier,
+            **kwargs,
        )
        return [doc for doc, _ in docs_and_scores]

    def similarity_search_with_score(
-        self, query: str, k: int = 4, filter: Optional[dict] = None
+        self,
+        query: str,
+        k: int = 4,
+        filter: Optional[dict] = None,
+        search_strategy: SearchStrategy = SearchStrategy.VECTOR_ONLY,
+        filter_threshold: float = 1,
+        text_weight: float = 0.5,
+        vector_weight: float = 0.5,
+        vector_select_count_multiplier: int = 10,
+        **kwargs: Any,
    ) -> List[Tuple[Document, float]]:
        """Return docs most similar to query. Uses cosine similarity.

@@ -424,20 +573,156 @@ class SingleStoreDB(VectorStore):
            k: Number of Documents to return. Defaults to 4.
            filter: A dictionary of metadata fields and values to filter by.
                    Defaults to None.
-
+            search_strategy (SearchStrategy): The search strategy to use.
+                Default is SearchStrategy.VECTOR_ONLY.
+                Available options are:
+                - SearchStrategy.VECTOR_ONLY: Searches only by vector similarity.
+                - SearchStrategy.TEXT_ONLY: Searches only by text similarity. This
+                    option is only available if use_full_text_search is True.
+                - SearchStrategy.FILTER_BY_TEXT: Filters by text similarity and
+                    searches by vector similarity. This option is only available if
+                    use_full_text_search is True.
+                - SearchStrategy.FILTER_BY_VECTOR: Filters by vector similarity and
+                    searches by text similarity. This option is only available if
+                    use_full_text_search is True.
+                - SearchStrategy.WEIGHTED_SUM: Searches by a weighted sum of text and
+                    vector similarity. This option is only available if
+                    use_full_text_search is True and distance_strategy is DOT_PRODUCT.
+            filter_threshold (float): The threshold for filtering by text or vector
+                similarity. Default is 0. This option has effect only if search_strategy
+                is SearchStrategy.FILTER_BY_TEXT or SearchStrategy.FILTER_BY_VECTOR.
+            text_weight (float): The weight of text similarity in the weighted sum
+                search strategy. Default is 0.5. This option has effect only if
+                search_strategy is SearchStrategy.WEIGHTED_SUM.
+            vector_weight (float): The weight of vector similarity in the weighted sum
+                search strategy. Default is 0.5. This option has effect only if
+                search_strategy is SearchStrategy.WEIGHTED_SUM.
+            vector_select_count_multiplier (int): The multiplier for the number of
+                vectors to select when using the vector index. Default is 10.
+                This parameter has effect only if use_vector_index is True and
+                search_strategy is SearchStrategy.WEIGHTED_SUM or
+                SearchStrategy.FILTER_BY_TEXT.
+                The number of vectors selected will
+                be k * vector_select_count_multiplier.
+                This is needed due to the limitations of the vector index.
        Returns:
            List of Documents most similar to the query and score for each
+            document.
+
+        Raises:
+            ValueError: If the search strategy is not supported with the
+                distance strategy.
+
+        Examples:
+            Basic Usage:
+            .. code-block:: python
+
+                from langchain_community.vectorstores import SingleStoreDB
+                from langchain_openai import OpenAIEmbeddings
+
+                s2 = SingleStoreDB.from_documents(
+                    docs,
+                    OpenAIEmbeddings(),
+                    host="username:password@localhost:3306/database"
+                )
+                results = s2.similarity_search_with_score("query text", 1,
+                                    {"metadata_field": "metadata_value"})
+
+            Different Search Strategies:
+
+            .. code-block:: python
+
+                from langchain_community.vectorstores import SingleStoreDB
+                from langchain_openai import OpenAIEmbeddings
+
+                s2 = SingleStoreDB.from_documents(
+                    docs,
+                    OpenAIEmbeddings(),
+                    host="username:password@localhost:3306/database",
+                    use_full_text_search=True,
+                    use_vector_index=True,
+                )
+                results = s2.similarity_search_with_score("query text", 1,
+                        search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
+                        filter_threshold=0.5)
+
+            Weighted Sum Search Strategy:
+            .. code-block:: python
+
+                from langchain_community.vectorstores import SingleStoreDB
+                from langchain_openai import OpenAIEmbeddings
+
+                s2 = SingleStoreDB.from_documents(
+                    docs,
+                    OpenAIEmbeddings(),
+                    host="username:password@localhost:3306/database",
+                    use_full_text_search=True,
+                    use_vector_index=True,
+                )
+                results = s2.similarity_search_with_score("query text", 1,
+                    search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
+                    text_weight=0.3,
+                    vector_weight=0.7)
        """
+
+        if (
+            search_strategy != SingleStoreDB.SearchStrategy.VECTOR_ONLY
+            and not self.use_full_text_search
+        ):
+            raise ValueError(
+                """Search strategy {} is not supported
+                when use_full_text_search is False""".format(search_strategy)
+            )
+
+        if (
+            search_strategy == SingleStoreDB.SearchStrategy.WEIGHTED_SUM
+            and self.distance_strategy != DistanceStrategy.DOT_PRODUCT
+        ):
+            raise ValueError(
+                "Search strategy {} is not supported with distance strategy {}".format(
+                    search_strategy, self.distance_strategy
+                )
+            )
+
        # Creates embedding vector from user query
-        embedding = self.embedding.embed_query(query)
+        embedding = []
+        if search_strategy != SingleStoreDB.SearchStrategy.TEXT_ONLY:
+            embedding = self.embedding.embed_query(query)
+
+        self.embedding.embed_query(query)
        conn = self.connection_pool.connect()
        result = []
        where_clause: str = ""
        where_clause_values: List[Any] = []
-        if filter:
+        if filter or search_strategy in [
+            SingleStoreDB.SearchStrategy.FILTER_BY_TEXT,
+            SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
+        ]:
            where_clause = "WHERE "
            arguments = []

+            if search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_TEXT:
+                arguments.append(
+                    "MATCH ({}) AGAINST (%s) > %s".format(self.content_field)
+                )
+                where_clause_values.append(query)
+                where_clause_values.append(float(filter_threshold))
+
+            if search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR:
+                condition = "{}({}, JSON_ARRAY_PACK(%s)) ".format(
+                    self.distance_strategy.name
+                    if isinstance(self.distance_strategy, DistanceStrategy)
+                    else self.distance_strategy,
+                    self.vector_field,
+                )
+                if self.distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE:
+                    condition += "< %s"
+                else:
+                    condition += "> %s"
+                arguments.append(condition)
+                where_clause_values.append("[{}]".format(",".join(map(str, embedding))))
+                where_clause_values.append(float(filter_threshold))
+
            def build_where_clause(
                where_clause_values: List[Any],
                sub_filter: dict,
@@ -459,29 +744,98 @@ class SingleStoreDB(VectorStore):
                        where_clause_values += prefix_args + [key]
                        where_clause_values.append(json.dumps(sub_filter[key]))

-            build_where_clause(where_clause_values, filter)
+            if filter:
+                build_where_clause(where_clause_values, filter)
            where_clause += " AND ".join(arguments)

        try:
            cur = conn.cursor()
            try:
-                cur.execute(
-                    """SELECT {}, {}, {}({}, JSON_ARRAY_PACK(%s)) as __score
-                    FROM {} {} ORDER BY __score {} LIMIT %s""".format(
-                        self.content_field,
-                        self.metadata_field,
-                        self.distance_strategy.name
-                        if isinstance(self.distance_strategy, DistanceStrategy)
-                        else self.distance_strategy,
-                        self.vector_field,
-                        self.table_name,
-                        where_clause,
-                        ORDERING_DIRECTIVE[self.distance_strategy],
-                    ),
-                    ("[{}]".format(",".join(map(str, embedding))),)
-                    + tuple(where_clause_values)
-                    + (k,),
-                )
+                if (
+                    search_strategy == SingleStoreDB.SearchStrategy.VECTOR_ONLY
+                    or search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_TEXT
+                ):
+                    search_options = ""
+                    if (
+                        self.use_vector_index
+                        and search_strategy
+                        == SingleStoreDB.SearchStrategy.FILTER_BY_TEXT
+                    ):
+                        search_options = "SEARCH_OPTIONS '{\"k\":%d}'" % (
+                            k * vector_select_count_multiplier
+                        )
+                    cur.execute(
+                        """SELECT {}, {}, {}({}, JSON_ARRAY_PACK(%s)) as __score
+                        FROM {} {} ORDER BY __score {}{} LIMIT %s""".format(
+                            self.content_field,
+                            self.metadata_field,
+                            self.distance_strategy.name
+                            if isinstance(self.distance_strategy, DistanceStrategy)
+                            else self.distance_strategy,
+                            self.vector_field,
+                            self.table_name,
+                            where_clause,
+                            search_options,
+                            ORDERING_DIRECTIVE[self.distance_strategy],
+                        ),
+                        ("[{}]".format(",".join(map(str, embedding))),)
+                        + tuple(where_clause_values)
+                        + (k,),
+                    )
+                elif (
+                    search_strategy == SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR
+                    or search_strategy == SingleStoreDB.SearchStrategy.TEXT_ONLY
+                ):
+                    cur.execute(
+                        """SELECT {}, {}, MATCH ({}) AGAINST (%s) as __score
+                        FROM {} {} ORDER BY __score DESC LIMIT %s""".format(
+                            self.content_field,
+                            self.metadata_field,
+                            self.content_field,
+                            self.table_name,
+                            where_clause,
+                        ),
+                        (query,) + tuple(where_clause_values) + (k,),
+                    )
+                elif search_strategy == SingleStoreDB.SearchStrategy.WEIGHTED_SUM:
+                    cur.execute(
+                        """SELECT {}, {}, __score1 * %s + __score2 * %s as __score
+                        FROM (
+                            SELECT {}, {}, {}, MATCH ({}) AGAINST (%s) as __score1 
+                        FROM {} {}) r1 FULL OUTER JOIN (
+                            SELECT {}, {}({}, JSON_ARRAY_PACK(%s)) as __score2
+                            FROM {} {} ORDER BY __score2 {} LIMIT %s
+                        ) r2 ON r1.{} = r2.{} ORDER BY __score {} LIMIT %s""".format(
+                            self.content_field,
+                            self.metadata_field,
+                            self.id_field,
+                            self.content_field,
+                            self.metadata_field,
+                            self.content_field,
+                            self.table_name,
+                            where_clause,
+                            self.id_field,
+                            self.distance_strategy.name
+                            if isinstance(self.distance_strategy, DistanceStrategy)
+                            else self.distance_strategy,
+                            self.vector_field,
+                            self.table_name,
+                            where_clause,
+                            ORDERING_DIRECTIVE[self.distance_strategy],
+                            self.id_field,
+                            self.id_field,
+                            ORDERING_DIRECTIVE[self.distance_strategy],
+                        ),
+                        (text_weight, vector_weight, query)
+                        + tuple(where_clause_values)
+                        + ("[{}]".format(",".join(map(str, embedding))),)
+                        + tuple(where_clause_values)
+                        + (k * vector_select_count_multiplier, k),
+                    )
+                else:
+                    raise ValueError(
+                        "Invalid search strategy: {}".format(search_strategy)
+                    )

                for row in cur.fetchall():
                    doc = Document(page_content=row[0], metadata=row[1])
@@ -503,10 +857,12 @@ class SingleStoreDB(VectorStore):
        content_field: str = "content",
        metadata_field: str = "metadata",
        vector_field: str = "vector",
+        id_field: str = "id",
        use_vector_index: bool = False,
        vector_index_name: str = "",
        vector_index_options: Optional[dict] = None,
        vector_size: int = 1536,
+        use_full_text_search: bool = False,
        pool_size: int = 5,
        max_overflow: int = 10,
        timeout: float = 30,
@@ -518,10 +874,99 @@ class SingleStoreDB(VectorStore):
            2. Creates a new table for the embeddings in SingleStoreDB.
            3. Adds the documents to the newly created table.
        This is intended to be a quick way to get started.
+        Args:
+            texts (List[str]): List of texts to add to the vectorstore.
+            embedding (Embeddings): A text embedding model.
+            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
+                Defaults to None.
+            distance_strategy (DistanceStrategy, optional):
+                Determines the strategy employed for calculating
+                the distance between vectors in the embedding space.
+                Defaults to DOT_PRODUCT.
+                Available options are:
+                - DOT_PRODUCT: Computes the scalar product of two vectors.
+                    This is the default behavior
+                - EUCLIDEAN_DISTANCE: Computes the Euclidean distance between
+                    two vectors. This metric considers the geometric distance in
+                    the vector space, and might be more suitable for embeddings
+                    that rely on spatial relationships. This metric is not
+                    compatible with the WEIGHTED_SUM search strategy.
+            table_name (str, optional): Specifies the name of the table in use.
+                Defaults to "embeddings".
+            content_field (str, optional): Specifies the field to store the content.
+                Defaults to "content".
+            metadata_field (str, optional): Specifies the field to store metadata.
+                Defaults to "metadata".
+            vector_field (str, optional): Specifies the field to store the vector.
+                Defaults to "vector".
+            id_field (str, optional): Specifies the field to store the id.
+                Defaults to "id".
+            use_vector_index (bool, optional): Toggles the use of a vector index.
+                Works only with SingleStoreDB 8.5 or later. Defaults to False.
+                If set to True, vector_size parameter is required to be set to
+                a proper value.
+            vector_index_name (str, optional): Specifies the name of the vector index.
+                Defaults to empty. Will be ignored if use_vector_index is set to False.
+            vector_index_options (dict, optional): Specifies the options for
+                the vector index. Defaults to {}.
+                Will be ignored if use_vector_index is set to False. The options are:
+                index_type (str, optional): Specifies the type of the index.
+                    Defaults to IVF_PQFS.
+                For more options, please refer to the SingleStoreDB documentation:
+                https://docs.singlestore.com/cloud/reference/sql-reference/vector-functions/vector-indexing/
+            vector_size (int, optional): Specifies the size of the vector.
+                Defaults to 1536. Required if use_vector_index is set to True.
+                Should be set to the same value as the size of the vectors
+                stored in the vector_field.
+            use_full_text_search (bool, optional): Toggles the use a full-text index
+                on the document content. Defaults to False. If set to True, the table
+                will be created with a full-text index on the content field,
+                and the simularity_search method will all using TEXT_ONLY,
+                FILTER_BY_TEXT, FILTER_BY_VECTOR, and WIGHTED_SUM search strategies.
+                If set to False, the simularity_search method will only allow
+                VECTOR_ONLY search strategy.
+
+            pool_size (int, optional): Determines the number of active connections in
+                the pool. Defaults to 5.
+            max_overflow (int, optional): Determines the maximum number of connections
+                allowed beyond the pool_size. Defaults to 10.
+            timeout (float, optional): Specifies the maximum wait time in seconds for
+                establishing a connection. Defaults to 30.
+
+            Additional optional arguments provide further customization over the
+            database connection:
+
+            pure_python (bool, optional): Toggles the connector mode. If True,
+                operates in pure Python mode.
+            local_infile (bool, optional): Allows local file uploads.
+            charset (str, optional): Specifies the character set for string values.
+            ssl_key (str, optional): Specifies the path of the file containing the SSL
+                key.
+            ssl_cert (str, optional): Specifies the path of the file containing the SSL
+                certificate.
+            ssl_ca (str, optional): Specifies the path of the file containing the SSL
+                certificate authority.
+            ssl_cipher (str, optional): Sets the SSL cipher list.
+            ssl_disabled (bool, optional): Disables SSL usage.
+            ssl_verify_cert (bool, optional): Verifies the server's certificate.
+                Automatically enabled if ``ssl_ca`` is specified.
+            ssl_verify_identity (bool, optional): Verifies the server's identity.
+            conv (dict[int, Callable], optional): A dictionary of data conversion
+                functions.
+            credential_type (str, optional): Specifies the type of authentication to
+                use: auth.PASSWORD, auth.JWT, or auth.BROWSER_SSO.
+            autocommit (bool, optional): Enables autocommits.
+            results_type (str, optional): Determines the structure of the query results:
+                tuples, namedtuples, dicts.
+            results_format (str, optional): Deprecated. This option has been renamed to
+                results_type.
+
        Example:
            .. code-block:: python
+
                from langchain_community.vectorstores import SingleStoreDB
-                from langchain_community.embeddings import OpenAIEmbeddings
+                from langchain_openai import OpenAIEmbeddings
+
                s2 = SingleStoreDB.from_texts(
                    texts,
                    OpenAIEmbeddings(),
@@ -536,6 +981,7 @@ class SingleStoreDB(VectorStore):
            content_field=content_field,
            metadata_field=metadata_field,
            vector_field=vector_field,
+            id_field=id_field,
            pool_size=pool_size,
            max_overflow=max_overflow,
            timeout=timeout,
@@ -543,6 +989,7 @@ class SingleStoreDB(VectorStore):
            vector_index_name=vector_index_name,
            vector_index_options=vector_index_options,
            vector_size=vector_size,
+            use_full_text_search=use_full_text_search,
            **kwargs,
        )
        instance.add_texts(texts, metadatas, embedding.embed_documents(texts), **kwargs)
--- a/libs/community/tests/integration_tests/vectorstores/test_singlestoredb.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_singlestoredb.py
@@ -1,4 +1,5 @@
 """Test SingleStoreDB functionality."""
+import math
 import os
 import tempfile
 from typing import List
@@ -67,11 +68,76 @@ class RandomEmbeddings(Embeddings):
        return [np.random.rand(100).tolist() for _ in uris]


+class IncrementalEmbeddings(Embeddings):
+    """Fake embeddings with incremental vectors. For testing purposes."""
+
+    def __init__(self) -> None:
+        self.counter = 0
+
+    def set_counter(self, counter: int) -> None:
+        self.counter = counter
+
+    def embed_query(self, text: str) -> List[float]:
+        self.counter += 1
+        return [
+            math.cos(self.counter * math.pi / 10),
+            math.sin(self.counter * math.pi / 10),
+        ]
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return [self.embed_query(text) for text in texts]
+
+
@pytest.fixture
 def texts() -> List[str]:
    return ["foo", "bar", "baz"]


+@pytest.fixture
+def snow_rain_docs() -> List[Document]:
+    return [
+        Document(
+            page_content="""In the parched desert, a sudden rainstorm brought relief,
+            as the droplets danced upon the thirsty earth, rejuvenating the landscape
+            with the sweet scent of petrichor.""",
+            metadata={"count": "1", "category": "rain", "group": "a"},
+        ),
+        Document(
+            page_content="""Amidst the bustling cityscape, the rain fell relentlessly,
+            creating a symphony of pitter-patter on the pavement, while umbrellas
+            bloomed like colorful flowers in a sea of gray.""",
+            metadata={"count": "2", "category": "rain", "group": "a"},
+        ),
+        Document(
+            page_content="""High in the mountains, the rain transformed into a delicate
+            mist, enveloping the peaks in a mystical veil, where each droplet seemed to
+            whisper secrets to the ancient rocks below.""",
+            metadata={"count": "3", "category": "rain", "group": "b"},
+        ),
+        Document(
+            page_content="""Blanketing the countryside in a soft, pristine layer, the
+            snowfall painted a serene tableau, muffling the world in a tranquil hush
+            as delicate flakes settled upon the branches of trees like nature's own 
+            lacework.""",
+            metadata={"count": "1", "category": "snow", "group": "b"},
+        ),
+        Document(
+            page_content="""In the urban landscape, snow descended, transforming
+            bustling streets into a winter wonderland, where the laughter of
+            children echoed amidst the flurry of snowballs and the twinkle of
+            holiday lights.""",
+            metadata={"count": "2", "category": "snow", "group": "a"},
+        ),
+        Document(
+            page_content="""Atop the rugged peaks, snow fell with an unyielding
+            intensity, sculpting the landscape into a pristine alpine paradise,
+            where the frozen crystals shimmered under the moonlight, casting a
+            spell of enchantment over the wilderness below.""",
+            metadata={"count": "3", "category": "snow", "group": "a"},
+        ),
+    ]
+
+
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
 def test_singlestoredb(texts: List[str]) -> None:
    """Test end to end construction and search."""
@@ -505,3 +571,184 @@ def test_singestoredb_add_image2() -> None:
    output = docsearch.similarity_search("horse", k=1)
    assert "horse" in output[0].page_content
    drop(table_name)
+
+
+@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
+def test_singlestoredb_text_only_search(snow_rain_docs: List[Document]) -> None:
+    table_name = "test_singlestoredb_text_only_search"
+    drop(table_name)
+    docsearch = SingleStoreDB(
+        RandomEmbeddings(),
+        table_name=table_name,
+        use_full_text_search=True,
+        host=TEST_SINGLESTOREDB_URL,
+    )
+    docsearch.add_documents(snow_rain_docs)
+    output = docsearch.similarity_search(
+        "rainstorm in parched desert",
+        k=3,
+        filter={"count": "1"},
+        search_strategy=SingleStoreDB.SearchStrategy.TEXT_ONLY,
+    )
+    assert len(output) == 2
+    assert (
+        "In the parched desert, a sudden rainstorm brought relief,"
+        in output[0].page_content
+    )
+    assert (
+        "Blanketing the countryside in a soft, pristine layer" in output[1].page_content
+    )
+
+    output = docsearch.similarity_search(
+        "snowfall in countryside",
+        k=3,
+        search_strategy=SingleStoreDB.SearchStrategy.TEXT_ONLY,
+    )
+    assert len(output) == 3
+    assert (
+        "Blanketing the countryside in a soft, pristine layer,"
+        in output[0].page_content
+    )
+    drop(table_name)
+
+
+@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
+def test_singlestoredb_filter_by_text_search(snow_rain_docs: List[Document]) -> None:
+    table_name = "test_singlestoredb_filter_by_text_search"
+    drop(table_name)
+    embeddings = IncrementalEmbeddings()
+    docsearch = SingleStoreDB.from_documents(
+        snow_rain_docs,
+        embeddings,
+        table_name=table_name,
+        use_full_text_search=True,
+        use_vector_index=True,
+        vector_size=2,
+        host=TEST_SINGLESTOREDB_URL,
+    )
+    output = docsearch.similarity_search(
+        "rainstorm in parched desert",
+        k=1,
+        search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_TEXT,
+        filter_threshold=0,
+    )
+    assert len(output) == 1
+    assert (
+        "In the parched desert, a sudden rainstorm brought relief"
+        in output[0].page_content
+    )
+    drop(table_name)
+
+
+@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
+def test_singlestoredb_filter_by_vector_search1(snow_rain_docs: List[Document]) -> None:
+    table_name = "test_singlestoredb_filter_by_vector_search1"
+    drop(table_name)
+    embeddings = IncrementalEmbeddings()
+    docsearch = SingleStoreDB.from_documents(
+        snow_rain_docs,
+        embeddings,
+        table_name=table_name,
+        use_full_text_search=True,
+        use_vector_index=True,
+        vector_size=2,
+        host=TEST_SINGLESTOREDB_URL,
+    )
+    output = docsearch.similarity_search(
+        "rainstorm in parched desert, rain",
+        k=1,
+        filter={"category": "rain"},
+        search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
+        filter_threshold=-0.2,
+    )
+    assert len(output) == 1
+    assert (
+        "High in the mountains, the rain transformed into a delicate"
+        in output[0].page_content
+    )
+    drop(table_name)
+
+
+@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
+def test_singlestoredb_filter_by_vector_search2(snow_rain_docs: List[Document]) -> None:
+    table_name = "test_singlestoredb_filter_by_vector_search2"
+    drop(table_name)
+    embeddings = IncrementalEmbeddings()
+    docsearch = SingleStoreDB.from_documents(
+        snow_rain_docs,
+        embeddings,
+        distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
+        table_name=table_name,
+        use_full_text_search=True,
+        use_vector_index=True,
+        vector_size=2,
+        host=TEST_SINGLESTOREDB_URL,
+    )
+    output = docsearch.similarity_search(
+        "rainstorm in parched desert, rain",
+        k=1,
+        filter={"group": "a"},
+        search_strategy=SingleStoreDB.SearchStrategy.FILTER_BY_VECTOR,
+        filter_threshold=1.5,
+    )
+    assert len(output) == 1
+    assert (
+        "Amidst the bustling cityscape, the rain fell relentlessly"
+        in output[0].page_content
+    )
+    drop(table_name)
+
+
+@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
+def test_singlestoredb_weighted_sum_search_unsupported_strategy(
+    snow_rain_docs: List[Document],
+) -> None:
+    table_name = "test_singlestoredb_waighted_sum_search_unsupported_strategy"
+    drop(table_name)
+    embeddings = IncrementalEmbeddings()
+    docsearch = SingleStoreDB.from_documents(
+        snow_rain_docs,
+        embeddings,
+        table_name=table_name,
+        use_full_text_search=True,
+        use_vector_index=True,
+        vector_size=2,
+        host=TEST_SINGLESTOREDB_URL,
+        distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
+    )
+    try:
+        docsearch.similarity_search(
+            "rainstorm in parched desert, rain",
+            k=1,
+            search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
+        )
+    except ValueError as e:
+        assert "Search strategy WEIGHTED_SUM is not" in str(e)
+    drop(table_name)
+
+
+@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
+def test_singlestoredb_weighted_sum_search(snow_rain_docs: List[Document]) -> None:
+    table_name = "test_singlestoredb_waighted_sum_search"
+    drop(table_name)
+    embeddings = IncrementalEmbeddings()
+    docsearch = SingleStoreDB.from_documents(
+        snow_rain_docs,
+        embeddings,
+        table_name=table_name,
+        use_full_text_search=True,
+        use_vector_index=True,
+        vector_size=2,
+        host=TEST_SINGLESTOREDB_URL,
+    )
+    output = docsearch.similarity_search(
+        "rainstorm in parched desert, rain",
+        k=1,
+        search_strategy=SingleStoreDB.SearchStrategy.WEIGHTED_SUM,
+        filter={"category": "snow"},
+    )
+    assert len(output) == 1
+    assert (
+        "Atop the rugged peaks, snow fell with an unyielding" in output[0].page_content
+    )
+    drop(table_name)