community[minor]: Vectara Integration Update - Streaming, FCS, Chat, updates to documentation and example notebooks (#21334)

Thank you for contributing to LangChain! **Description:** update to the Vectara / Langchain integration to integrate new Vectara capabilities: - Full RAG implemented as a Runnable with as_rag() - Vectara chat supported with as_chat() - Both support streaming response - Updated documentation and example notebook to reflect all the changes - Updated Vectara templates **Twitter handle:** ofermend **Add tests and docs**: no new tests or docs, but updated both existing tests and existing docs
2025-09-05 04:55:14 +00:00 · 2024-06-04 12:57:28 -07:00
parent cb183a9bf1
commit ad502e8d50
14 changed files with 1103 additions and 1599 deletions
--- a/libs/community/langchain_community/vectorstores/vectara.py
+++ b/libs/community/langchain_community/vectorstores/vectara.py
@@ -3,18 +3,25 @@ from __future__ import annotations
 import json
 import logging
 import os
+import warnings
 from dataclasses import dataclass, field
 from hashlib import md5
-from typing import Any, Iterable, List, Optional, Tuple, Type
+from typing import Any, Iterable, Iterator, List, Optional, Tuple, Type

 import requests
+from langchain_core.callbacks.manager import (
+    CallbackManagerForRetrieverRun,
+)
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
-from langchain_core.pydantic_v1 import Field
+from langchain_core.runnables import Runnable, RunnableConfig
 from langchain_core.vectorstores import VectorStore, VectorStoreRetriever

 logger = logging.getLogger(__name__)

+MMR_RERANKER_ID = 272725718
+RERANKER_MULTILINGUAL_V1_ID = 272725719
+

@dataclass
 class SummaryConfig:
@@ -31,11 +38,13 @@ class SummaryConfig:
    max_results: int = 7
    response_lang: str = "eng"
    prompt_name: str = "vectara-summary-ext-v1.2.0"
+    stream: bool = False


@dataclass
 class MMRConfig:
    """Configuration for Maximal Marginal Relevance (MMR) search.
+       This will soon be deprated in favor of RerankConfig.

    is_enabled: True if MMR is enabled, False otherwise
    mmr_k: number of results to fetch for MMR, defaults to 50
@@ -53,6 +62,26 @@ class MMRConfig:
    diversity_bias: float = 0.3


+@dataclass
+class RerankConfig:
+    """Configuration for Reranker.
+
+    reranker: "mmr", "rerank_multilingual_v1" or "none"
+    rerank_k: number of results to fetch before reranking, defaults to 50
+    mmr_diversity_bias: for MMR only - a number between 0 and 1 that determines
+        the degree of diversity among the results with 0 corresponding
+        to minimum diversity and 1 to maximum diversity.
+        Defaults to 0.3.
+        Note: mmr_diversity_bias is equivalent 1-lambda_mult
+        where lambda_mult is the value often used in max_marginal_relevance_search()
+        We chose to use that since we believe it's more intuitive to the user.
+    """
+
+    reranker: str = "none"
+    rerank_k: int = 50
+    mmr_diversity_bias: float = 0.3
+
+
@dataclass
 class VectaraQueryConfig:
    """Configuration for Vectara query.
@@ -66,9 +95,11 @@ class VectaraQueryConfig:
    score_threshold: minimal score threshold for the result.
        If defined, results with score less than this value will be
        filtered out.
-    n_sentence_context: number of sentences before/after the matching segment
+    n_sentence_before: number of sentences before the matching segment
        to add, defaults to 2
-    mmr_config: MMRConfig configuration dataclass
+    n_sentence_after: number of sentences before the matching segment
+        to add, defaults to 2
+    rerank_config: RerankConfig configuration dataclass
    summary_config: SummaryConfig configuration dataclass
    """

@@ -76,10 +107,63 @@ class VectaraQueryConfig:
    lambda_val: float = 0.0
    filter: str = ""
    score_threshold: Optional[float] = None
-    n_sentence_context: int = 2
-    mmr_config: MMRConfig = field(default_factory=MMRConfig)
+    n_sentence_before: int = 2
+    n_sentence_after: int = 2
+    rerank_config: RerankConfig = field(default_factory=RerankConfig)
    summary_config: SummaryConfig = field(default_factory=SummaryConfig)

+    def __init__(
+        self,
+        k: int = 10,
+        lambda_val: float = 0.0,
+        filter: str = "",
+        score_threshold: Optional[float] = None,
+        n_sentence_before: int = 2,
+        n_sentence_after: int = 2,
+        n_sentence_context: Optional[int] = None,
+        mmr_config: Optional[MMRConfig] = None,
+        summary_config: Optional[SummaryConfig] = None,
+        rerank_config: Optional[RerankConfig] = None,
+    ):
+        self.k = k
+        self.lambda_val = lambda_val
+        self.filter = filter
+        self.score_threshold = score_threshold
+
+        if summary_config:
+            self.summary_config = summary_config
+        else:
+            self.summary_config = SummaryConfig()
+
+        # handle n_sentence_context for backward compatibility
+        if n_sentence_context:
+            self.n_sentence_before = n_sentence_context
+            self.n_sentence_after = n_sentence_context
+            warnings.warn(
+                "n_sentence_context is deprecated. "
+                "Please use n_sentence_before and n_sentence_after instead",
+                DeprecationWarning,
+            )
+        else:
+            self.n_sentence_before = n_sentence_before
+            self.n_sentence_after = n_sentence_after
+
+        # handle mmr_config for backward compatibility
+        if rerank_config:
+            self.rerank_config = rerank_config
+        elif mmr_config:
+            self.rerank_config = RerankConfig(
+                reranker="mmr",
+                rerank_k=mmr_config.mmr_k,
+                mmr_diversity_bias=mmr_config.diversity_bias,
+            )
+            warnings.warn(
+                "MMRConfig is deprecated. Please use RerankConfig instead.",
+                DeprecationWarning,
+            )
+        else:
+            self.rerank_config = RerankConfig()
+

 class Vectara(VectorStore):
    """`Vectara API` vector store.
@@ -150,9 +234,7 @@ class Vectara(VectorStore):
        Delete a document from the Vectara corpus.

        Args:
-            url (str): URL of the page to delete.
            doc_id (str): ID of the document to delete.
-
        Returns:
            bool: True if deletion was successful, False otherwise.
        """
@@ -207,6 +289,21 @@ class Vectara(VectorStore):
        else:
            return "E_SUCCEEDED"

+    def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
+        """Delete by vector ID or other criteria.
+        Args:
+            ids: List of ids to delete.
+
+        Returns:
+            Optional[bool]: True if deletion is successful,
+            False otherwise, None if not implemented.
+        """
+        if ids:
+            success = [self._delete_doc(id) for id in ids]
+            return all(success)
+        else:
+            return True
+
    def add_files(
        self,
        files_list: Iterable[str],
@@ -317,6 +414,84 @@ class Vectara(VectorStore):
            )
        return [doc_id]

+    def _get_query_body(
+        self,
+        query: str,
+        config: VectaraQueryConfig,
+        chat: Optional[bool] = False,
+        chat_conv_id: Optional[str] = None,
+        **kwargs: Any,
+    ) -> dict:
+        """Build the body for the API
+
+        Args:
+            query: Text to look up documents similar to.
+            config: VectaraQueryConfig object
+        Returns:
+            A dictionary with the body of the query
+        """
+        if isinstance(config.rerank_config, dict):
+            config.rerank_config = RerankConfig(**config.rerank_config)
+        if isinstance(config.summary_config, dict):
+            config.summary_config = SummaryConfig(**config.summary_config)
+
+        body = {
+            "query": [
+                {
+                    "query": query,
+                    "start": 0,
+                    "numResults": (
+                        config.rerank_config.rerank_k
+                        if (
+                            config.rerank_config.reranker
+                            in ["mmr", "rerank_multilingual_v1"]
+                        )
+                        else config.k
+                    ),
+                    "contextConfig": {
+                        "sentencesBefore": config.n_sentence_before,
+                        "sentencesAfter": config.n_sentence_after,
+                    },
+                    "corpusKey": [
+                        {
+                            "corpusId": self._vectara_corpus_id,
+                            "metadataFilter": config.filter,
+                        }
+                    ],
+                }
+            ]
+        }
+
+        if config.lambda_val > 0:
+            body["query"][0]["corpusKey"][0]["lexicalInterpolationConfig"] = {  # type: ignore
+                "lambda": config.lambda_val
+            }
+
+        if config.rerank_config.reranker == "mmr":
+            body["query"][0]["rerankingConfig"] = {
+                "rerankerId": MMR_RERANKER_ID,
+                "mmrConfig": {"diversityBias": config.rerank_config.mmr_diversity_bias},
+            }
+        elif config.rerank_config.reranker == "rerank_multilingual_v1":
+            body["query"][0]["rerankingConfig"] = {
+                "rerankerId": RERANKER_MULTILINGUAL_V1_ID,
+            }
+
+        if config.summary_config.is_enabled:
+            body["query"][0]["summary"] = [
+                {
+                    "maxSummarizedResults": config.summary_config.max_results,
+                    "responseLang": config.summary_config.response_lang,
+                    "summarizerPromptName": config.summary_config.prompt_name,
+                }
+            ]
+            if chat:
+                body["query"][0]["summary"][0]["chat"] = {  # type: ignore
+                    "store": True,
+                    "conversationId": chat_conv_id,
+                }
+        return body
+
    def vectara_query(
        self,
        query: str,
@@ -332,54 +507,11 @@ class Vectara(VectorStore):
            A list of k Documents matching the given query
            If summary is enabled, last document is the summary text with 'summary'=True
        """
-        if isinstance(config.mmr_config, dict):
-            config.mmr_config = MMRConfig(**config.mmr_config)
-        if isinstance(config.summary_config, dict):
-            config.summary_config = SummaryConfig(**config.summary_config)
-
-        data = {
-            "query": [
-                {
-                    "query": query,
-                    "start": 0,
-                    "numResults": (
-                        config.mmr_config.mmr_k
-                        if config.mmr_config.is_enabled
-                        else config.k
-                    ),
-                    "contextConfig": {
-                        "sentencesBefore": config.n_sentence_context,
-                        "sentencesAfter": config.n_sentence_context,
-                    },
-                    "corpusKey": [
-                        {
-                            "customerId": self._vectara_customer_id,
-                            "corpusId": self._vectara_corpus_id,
-                            "metadataFilter": config.filter,
-                            "lexicalInterpolationConfig": {"lambda": config.lambda_val},
-                        }
-                    ],
-                }
-            ]
-        }
-        if config.mmr_config.is_enabled:
-            data["query"][0]["rerankingConfig"] = {
-                "rerankerId": 272725718,
-                "mmrConfig": {"diversityBias": config.mmr_config.diversity_bias},
-            }
-        if config.summary_config.is_enabled:
-            data["query"][0]["summary"] = [
-                {
-                    "maxSummarizedResults": config.summary_config.max_results,
-                    "responseLang": config.summary_config.response_lang,
-                    "summarizerPromptName": config.summary_config.prompt_name,
-                }
-            ]
-
+        body = self._get_query_body(query, config, **kwargs)
        response = self._session.post(
            headers=self._get_post_headers(),
            url="https://api.vectara.io/v1/query",
-            data=json.dumps(data),
+            data=json.dumps(body),
            timeout=self.vectara_api_timeout,
        )

@@ -389,7 +521,7 @@ class Vectara(VectorStore):
                f"(code {response.status_code}, reason {response.reason}, details "
                f"{response.text})",
            )
-            return [], ""  # type: ignore[return-value]
+            return []

        result = response.json()

@@ -424,14 +556,19 @@ class Vectara(VectorStore):
            for x, md in zip(responses, metadatas)
        ]

-        if config.mmr_config.is_enabled:
+        if config.rerank_config.reranker in ["mmr", "rerank_multilingual_v1"]:
            res = res[: config.k]
        if config.summary_config.is_enabled:
            summary = result["responseSet"][0]["summary"][0]["text"]
+            fcs = result["responseSet"][0]["summary"][0]["factualConsistency"]["score"]
            res.append(
-                (Document(page_content=summary, metadata={"summary": True}), 0.0)
+                (
+                    Document(
+                        page_content=summary, metadata={"summary": True, "fcs": fcs}
+                    ),
+                    0.0,
+                )
            )
-
        return res

    def similarity_search_with_score(
@@ -444,12 +581,15 @@ class Vectara(VectorStore):
        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 10.
+
            any other querying variable in VectaraQueryConfig like:
            - lambda_val: lexical match parameter for hybrid search.
            - filter: filter string
            - score_threshold: minimal score threshold for the result.
-            - n_sentence_context: number of sentences before/after the matching segment
-            - mmr_config: optional configuration for MMR (see MMRConfig dataclass)
+            - n_sentence_before: number of sentences before the matching segment
+            - n_sentence_after: number of sentences after the matching segment
+            - rerank_config: optional configuration for Reranking
+              (see RerankConfig dataclass)
            - summary_config: optional configuration for summary
              (see SummaryConfig dataclass)
        Returns:
@@ -503,8 +643,8 @@ class Vectara(VectorStore):
        Returns:
            List of Documents selected by maximal marginal relevance.
        """
-        kwargs["mmr_config"] = MMRConfig(
-            is_enabled=True, mmr_k=fetch_k, diversity_bias=1 - lambda_mult
+        kwargs["rerank_config"] = RerankConfig(
+            reranker="mmr", rerank_k=fetch_k, mmr_diversity_bias=1 - lambda_mult
        )
        return self.similarity_search(query, **kwargs)

@@ -567,42 +707,188 @@ class Vectara(VectorStore):
        vectara.add_files(files, metadatas)
        return vectara

+    def as_rag(self, config: VectaraQueryConfig) -> VectaraRAG:
+        """Return a Vectara RAG runnable."""
+        return VectaraRAG(self, config)
+
+    def as_chat(self, config: VectaraQueryConfig) -> VectaraRAG:
+        """Return a Vectara RAG runnable for chat."""
+        return VectaraRAG(self, config, chat=True)
+
+    def as_retriever(self, **kwargs: Any) -> VectaraRetriever:
+        """return a retriever object."""
+        return VectaraRetriever(
+            vectorstore=self, config=kwargs.get("config", VectaraQueryConfig())
+        )
+

 class VectaraRetriever(VectorStoreRetriever):
-    """Retriever for `Vectara`."""
+    """Vectara Retriever class."""

    vectorstore: Vectara
-    """Vectara vectorstore."""
-    search_kwargs: dict = Field(
-        default_factory=lambda: {
-            "lambda_val": 0.0,
-            "k": 5,
-            "filter": "",
-            "n_sentence_context": "2",
-            "summary_config": SummaryConfig(),
-        }
-    )
+    """VectorStore to use for retrieval."""

-    """Search params.
-        k: Number of Documents to return. Defaults to 5.
-        lambda_val: lexical match parameter for hybrid search.
-        filter: Dictionary of argument(s) to filter on metadata. For example a
-            filter can be "doc.rating > 3.0 and part.lang = 'deu'"} see
-            https://docs.vectara.com/docs/search-apis/sql/filter-overview
-            for more details.
-        n_sentence_context: number of sentences before/after the matching segment to add
-    """
+    config: VectaraQueryConfig
+    """Configuration for this retriever."""

-    def add_texts(
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        docs_and_scores = self.vectorstore.vectara_query(query, self.config)
+        return [doc for doc, _ in docs_and_scores]
+
+    def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
+        """Add documents to vectorstore."""
+        return self.vectorstore.add_documents(documents, **kwargs)
+
+
+class VectaraRAG(Runnable):
+    def __init__(
+        self, vectara: Vectara, config: VectaraQueryConfig, chat: bool = False
+    ):
+        self.vectara = vectara
+        self.config = config
+        self.chat = chat
+        self.conv_id = None
+
+    def stream(
        self,
-        texts: List[str],
-        metadatas: Optional[List[dict]] = None,
-        doc_metadata: Optional[dict] = None,
-    ) -> None:
-        """Add text to the Vectara vectorstore.
+        input: str,
+        config: Optional[RunnableConfig] = None,
+        **kwargs: Any,
+    ) -> Iterator[dict]:
+        """get streaming output from Vectara RAG

        Args:
-            texts (List[str]): The text
-            metadatas (List[dict]): Metadata dicts, must line up with existing store
+            query: The input query
+
+        Returns:
+            The output dictionary with question, answer and context
        """
-        self.vectorstore.add_texts(texts, metadatas, doc_metadata or {})
+        body = self.vectara._get_query_body(input, self.config, self.chat, self.conv_id)
+
+        response = self.vectara._session.post(
+            headers=self.vectara._get_post_headers(),
+            url="https://api.vectara.io/v1/stream-query",
+            data=json.dumps(body),
+            timeout=self.vectara.vectara_api_timeout,
+            stream=True,
+        )
+
+        if response.status_code != 200:
+            logger.error(
+                "Query failed %s",
+                f"(code {response.status_code}, reason {response.reason}, details "
+                f"{response.text})",
+            )
+            return
+
+        responses = []
+        documents = []
+
+        yield {"question": input}  # First chunk is the question
+
+        for line in response.iter_lines():
+            if line:  # filter out keep-alive new lines
+                data = json.loads(line.decode("utf-8"))
+                result = data["result"]
+                response_set = result["responseSet"]
+                if response_set is None:
+                    summary = result.get("summary", None)
+                    if summary is None:
+                        continue
+                    if len(summary.get("status")) > 0:
+                        logger.error(
+                            f"Summary generation failed with status "
+                            f"{summary.get('status')[0].get('statusDetail')}"
+                        )
+                        continue
+
+                    # Store conversation ID for chat, if applicable
+                    chat = summary.get("chat", None)
+                    if chat and chat.get("status", None):
+                        st_code = chat["status"]
+                        logger.info(f"Chat query failed with code {st_code}")
+                        if st_code == "RESOURCE_EXHAUSTED":
+                            self.conv_id = None
+                            logger.error(
+                                "Sorry, Vectara chat turns exceeds plan limit."
+                            )
+                            continue
+
+                    conv_id = chat.get("conversationId", None) if chat else None
+                    if conv_id:
+                        self.conv_id = conv_id
+
+                    # If FCS is provided, pull it from the JSON response
+                    if summary.get("factualConsistency", None):
+                        fcs = summary.get("factualConsistency", {}).get("score", None)
+                        yield {"fcs": fcs}
+                        continue
+
+                    # Yield the summary chunk
+                    chunk = str(summary["text"])
+                    yield {"answer": chunk}
+                else:
+                    if self.config.score_threshold:
+                        responses = [
+                            r
+                            for r in response_set["response"]
+                            if r["score"] > self.config.score_threshold
+                        ]
+                    else:
+                        responses = response_set["response"]
+                    documents = response_set["document"]
+                    metadatas = []
+                    for x in responses:
+                        md = {m["name"]: m["value"] for m in x["metadata"]}
+                        doc_num = x["documentIndex"]
+                        doc_md = {
+                            m["name"]: m["value"]
+                            for m in documents[doc_num]["metadata"]
+                        }
+                        if "source" not in doc_md:
+                            doc_md["source"] = "vectara"
+                        md.update(doc_md)
+                        metadatas.append(md)
+                    res = [
+                        (
+                            Document(
+                                page_content=x["text"],
+                                metadata=md,
+                            ),
+                            x["score"],
+                        )
+                        for x, md in zip(responses, metadatas)
+                    ]
+                    if self.config.rerank_config.reranker in [
+                        "mmr",
+                        "rerank_multilingual_v1",
+                    ]:
+                        res = res[: self.config.k]
+                    yield {"context": res}
+        return
+
+    def invoke(
+        self,
+        input: str,
+        config: Optional[RunnableConfig] = None,
+    ) -> dict:
+        res = {"answer": ""}
+        for chunk in self.stream(input):
+            if "context" in chunk:
+                res["context"] = chunk["context"]
+            elif "question" in chunk:
+                res["question"] = chunk["question"]
+            elif "answer" in chunk:
+                res["answer"] += chunk["answer"]
+            elif "fcs" in chunk:
+                res["fcs"] = chunk["fcs"]
+            else:
+                logger.error(f"Unknown chunk type: {chunk}")
+        return res
--- a/libs/community/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_vectara.py
@@ -4,19 +4,25 @@ import urllib.request
 import pytest
 from langchain_core.documents import Document

-# from langchain_community.vectorstores.vectara import Vectara, SummaryConfig
-from langchain_community.vectorstores.vectara import SummaryConfig, Vectara
-from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+from langchain_community.vectorstores import Vectara
+from langchain_community.vectorstores.vectara import (
+    MMRConfig,
+    RerankConfig,
+    SummaryConfig,
+    VectaraQueryConfig,
+)

 #
 # For this test to run properly, please setup as follows:
-# 1. Create a Vectara account: sign up at https://console.vectara.com/signup
+# 1. Create a Vectara account: sign up at https://www.vectara.com/integrations/langchain
 # 2. Create a corpus in your Vectara account, with a filter attribute called "test_num".
 # 3. Create an API_KEY for this corpus with permissions for query and indexing
 # 4. Setup environment variables:
 #    VECTARA_API_KEY, VECTARA_CORPUS_ID and VECTARA_CUSTOMER_ID
 #

+test_prompt_name = "vectara-experimental-summary-ext-2023-12-11-sml"
+

 def get_abbr(s: str) -> str:
    words = s.split(" ")  # Split the string into words
@@ -50,36 +56,34 @@ def vectara1():  # type: ignore[no-untyped-def]
    yield vectara1

    # Tear down code
-    for doc_id in doc_ids:
-        vectara1._delete_doc(doc_id)
+    vectara1.delete(doc_ids)


-def test_vectara_add_documents(vectara1) -> None:  # type: ignore[no-untyped-def]
+def test_vectara_add_documents(vectara1: Vectara) -> None:  # type: ignore[no-untyped-def]
    """Test add_documents."""

    # test without filter
    output1 = vectara1.similarity_search(
        "large language model",
        k=2,
-        n_sentence_context=0,
+        n_sentence_before=0,
+        n_sentence_after=0,
    )
    assert len(output1) == 2
    assert output1[0].page_content == "large language model"
    assert output1[0].metadata["abbr"] == "llm"
-    assert output1[1].page_content == "grounded generation"
-    assert output1[1].metadata["abbr"] == "gg"

    # test with metadata filter (doc level)
-    # since the query does not match test_num=1 directly we get "LLM" as the result
    output2 = vectara1.similarity_search(
        "large language model",
        k=1,
-        n_sentence_context=0,
+        n_sentence_before=0,
+        n_sentence_after=0,
        filter="doc.test_num = 1",
    )
    assert len(output2) == 1
-    assert output2[0].page_content == "grounded generation"
-    assert output2[0].metadata["abbr"] == "gg"
+    assert output2[0].page_content == "retrieval augmented generation"
+    assert output2[0].metadata["abbr"] == "rag"

    # test without filter but with similarity score
    # this is similar to the first test, but given the score threshold
@@ -87,19 +91,21 @@ def test_vectara_add_documents(vectara1) -> None:  # type: ignore[no-untyped-def
    output3 = vectara1.similarity_search_with_score(
        "large language model",
        k=2,
-        score_threshold=0.8,
-        n_sentence_context=0,
+        score_threshold=0.5,
+        n_sentence_before=0,
+        n_sentence_after=0,
    )
-    assert len(output3) == 1
+    assert len(output3) == 2
    assert output3[0][0].page_content == "large language model"
    assert output3[0][0].metadata["abbr"] == "llm"


-def test_vectara_from_files() -> None:
-    """Test end to end construction and search."""
-
+@pytest.fixture(scope="function")
+def vectara2():  # type: ignore[no-untyped-def]
    # download documents to local storage and then upload as files
    # attention paper and deep learning book
+    vectara2: Vectara = Vectara()
+
    urls = [
        (
            "https://papers.nips.cc/paper_files/paper/2017/"
@@ -117,50 +123,102 @@ def test_vectara_from_files() -> None:
        urllib.request.urlretrieve(url, name)
        files_list.append(name)

-    docsearch: Vectara = Vectara()
-    doc_ids = docsearch.add_files(
+    doc_ids = vectara2.add_files(
        files_list=files_list,
-        embedding=FakeEmbeddings(),
        metadatas=[{"url": url, "test_num": "2"} for url in urls],
    )

-    # finally do a similarity search to see if all works okay
-    output = docsearch.similarity_search(
+    yield vectara2
+
+    # Tear down code
+    vectara2.delete(doc_ids)
+
+
+def test_vectara_from_files(vectara2: Vectara) -> None:
+    """test uploading data from files"""
+    output = vectara2.similarity_search(
        "By the commonly adopted machine learning tradition",
        k=1,
-        n_sentence_context=0,
+        n_sentence_before=0,
+        n_sentence_after=0,
        filter="doc.test_num = 2",
    )
-    assert output[0].page_content == (
-        "By the commonly adopted machine learning tradition "
-        "(e.g., Chapter 28 in Murphy, 2012; Deng and Li, 2013), it may be natural "
-        "to just classify deep learning techniques into deep discriminative models "
-        "(e.g., DNNs) and deep probabilistic generative models (e.g., DBN, Deep "
-        "Boltzmann Machine (DBM))."
+    assert (
+        "By the commonly adopted machine learning tradition" in output[0].page_content
    )

-    # finally do a similarity search to see if all works okay
-    output = docsearch.similarity_search(
+    # another similarity search, this time with n_sentences_before/after = 1
+    output = vectara2.similarity_search(
+        "By the commonly adopted machine learning tradition",
+        k=1,
+        n_sentence_before=1,
+        n_sentence_after=1,
+        filter="doc.test_num = 2",
+    )
+    assert "Note the use of" in output[0].page_content
+
+    # Test the old n_sentence_context to ensure it's backward compatible
+    output = vectara2.similarity_search(
        "By the commonly adopted machine learning tradition",
        k=1,
        n_sentence_context=1,
        filter="doc.test_num = 2",
    )
-    assert output[0].page_content == (
-        """\
-Note the use of “hybrid” in 3) above is different from that used sometimes in the literature, \
-which for example refers to the hybrid systems for speech recognition feeding the output probabilities of a neural network into an HMM \
-(Bengio et al., 1991; Bourlard and Morgan, 1993; Morgan, 2012). \
-By the commonly adopted machine learning tradition (e.g., Chapter 28 in Murphy, 2012; Deng and Li, 2013), \
-it may be natural to just classify deep learning techniques into deep discriminative models (e.g., DNNs) \
-and deep probabilistic generative models (e.g., DBN, Deep Boltzmann Machine (DBM)). \
-This classification scheme, however, misses a key insight gained in deep learning research about how generative \
-models can greatly improve the training of DNNs and other deep discriminative models via better regularization.\
-"""  # noqa: E501
+    assert "Note the use of" in output[0].page_content
+
+
+def test_vectara_rag_with_reranking(vectara2: Vectara) -> None:
+    """Test Vectara reranking."""
+
+    query_str = "What is a transformer model?"
+
+    # Note: we don't test rerank_multilingual_v1 as it's for Scale only
+
+    # Test MMR
+    summary_config = SummaryConfig(
+        is_enabled=True,
+        max_results=7,
+        response_lang="eng",
+        prompt_name=test_prompt_name,
+    )
+    rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
+    config = VectaraQueryConfig(
+        k=10,
+        lambda_val=0.005,
+        rerank_config=rerank_config,
+        summary_config=summary_config,
    )

-    for doc_id in doc_ids:
-        docsearch._delete_doc(doc_id)
+    rag1 = vectara2.as_rag(config)
+    response1 = rag1.invoke(query_str)
+
+    assert "transformer model" in response1["answer"].lower()
+
+    # Test No reranking
+    summary_config = SummaryConfig(
+        is_enabled=True,
+        max_results=7,
+        response_lang="eng",
+        prompt_name=test_prompt_name,
+    )
+    rerank_config = RerankConfig(reranker="None")
+    config = VectaraQueryConfig(
+        k=10,
+        lambda_val=0.005,
+        rerank_config=rerank_config,
+        summary_config=summary_config,
+    )
+    rag2 = vectara2.as_rag(config)
+    response2 = rag2.invoke(query_str)
+
+    assert "transformer model" in response2["answer"].lower()
+
+    # assert that the page content is different for the top 5 results
+    # in each reranking
+    n_results = 10
+    response1_content = [x[0].page_content for x in response1["context"][:n_results]]
+    response2_content = [x[0].page_content for x in response2["context"][:n_results]]
+    assert response1_content != response2_content


@pytest.fixture(scope="function")
@@ -206,21 +264,20 @@ def vectara3():  # type: ignore[no-untyped-def]
    yield vectara3

    # Tear down code
-    for doc_id in doc_ids:
-        vectara3._delete_doc(doc_id)
+    vectara3.delete(doc_ids)


-def test_vectara_mmr(vectara3) -> None:  # type: ignore[no-untyped-def]
+def test_vectara_with_langchain_mmr(vectara3: Vectara) -> None:  # type: ignore[no-untyped-def]
    # test max marginal relevance
    output1 = vectara3.max_marginal_relevance_search(
        "generative AI",
        k=2,
        fetch_k=6,
        lambda_mult=1.0,  # no diversity bias
-        n_sentence_context=0,
+        n_sentence_before=0,
+        n_sentence_after=0,
    )
    assert len(output1) == 2
-    assert "Generative AI promises to revolutionize how" in output1[0].page_content
    assert (
        "This is why today we're adding a fundamental capability"
        in output1[1].page_content
@@ -231,16 +288,64 @@ def test_vectara_mmr(vectara3) -> None:  # type: ignore[no-untyped-def]
        k=2,
        fetch_k=6,
        lambda_mult=0.0,  # only diversity bias
-        n_sentence_context=0,
+        n_sentence_before=0,
+        n_sentence_after=0,
    )
    assert len(output2) == 2
-    assert "Generative AI promises to revolutionize how" in output2[0].page_content
    assert (
        "Neural LLM systems are excellent at understanding the context"
        in output2[1].page_content
    )


+def test_vectara_mmr(vectara3: Vectara) -> None:  # type: ignore[no-untyped-def]
+    # test MMR directly with rerank_config
+    summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
+    rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
+    config = VectaraQueryConfig(
+        k=10,
+        lambda_val=0.005,
+        rerank_config=rerank_config,
+        summary_config=summary_config,
+    )
+    rag = vectara3.as_rag(config)
+    output1 = rag.invoke("what is generative AI?")["answer"]
+    assert len(output1) > 0
+
+    # test MMR directly with old mmr_config
+    summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
+    mmr_config = MMRConfig(is_enabled=True, mmr_k=50, diversity_bias=0.2)
+    config = VectaraQueryConfig(
+        k=10, lambda_val=0.005, mmr_config=mmr_config, summary_config=summary_config
+    )
+    rag = vectara3.as_rag(config)
+    output2 = rag.invoke("what is generative AI?")["answer"]
+    assert len(output2) > 0
+
+    # test reranking disabled - RerankConfig
+    summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
+    rerank_config = RerankConfig(reranker="none")
+    config = VectaraQueryConfig(
+        k=10,
+        lambda_val=0.005,
+        rerank_config=rerank_config,
+        summary_config=summary_config,
+    )
+    rag = vectara3.as_rag(config)
+    output1 = rag.invoke("what is generative AI?")["answer"]
+    assert len(output1) > 0
+
+    # test with reranking disabled - MMRConfig
+    summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
+    mmr_config = MMRConfig(is_enabled=False, mmr_k=50, diversity_bias=0.2)
+    config = VectaraQueryConfig(
+        k=10, lambda_val=0.005, mmr_config=mmr_config, summary_config=summary_config
+    )
+    rag = vectara3.as_rag(config)
+    output2 = rag.invoke("what is generative AI?")["answer"]
+    assert len(output2) > 0
+
+
 def test_vectara_with_summary(vectara3) -> None:  # type: ignore[no-untyped-def]
    """Test vectara summary."""
    # test summarization
@@ -248,7 +353,12 @@ def test_vectara_with_summary(vectara3) -> None:  # type: ignore[no-untyped-def]
    output1 = vectara3.similarity_search(
        query="what is generative AI?",
        k=num_results,
-        summary_config=SummaryConfig(is_enabled=True, max_results=5),
+        summary_config=SummaryConfig(
+            is_enabled=True,
+            max_results=5,
+            response_lang="eng",
+            prompt_name=test_prompt_name,
+        ),
    )

    assert len(output1) == num_results + 1
--- a/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_indexing_docs.py
@@ -93,6 +93,7 @@ def test_compatible_vectorstore_documentation() -> None:
        "Vald",
        "VDMS",
        "Vearch",
+        "Vectara",
        "VespaStore",
        "VLite",
        "Weaviate",