community: update Vectara integration (#27869)

Thank you for contributing to LangChain! - **Description:** Updated Vectara integration - **Issue:** refresh on descriptions across all demos and added UDF reranker - **Dependencies:** None - **Twitter handle:** @ofermend --------- Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
2025-09-12 12:59:07 +00:00 · 2024-11-04 12:40:39 -08:00
parent 14a71a6e77
commit d7c39e6dbb
6 changed files with 60 additions and 25 deletions
--- a/libs/community/langchain_community/vectorstores/vectara.py
+++ b/libs/community/langchain_community/vectorstores/vectara.py
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)

 MMR_RERANKER_ID = 272725718
 RERANKER_MULTILINGUAL_V1_ID = 272725719
+UDF_RERANKER_ID = 272725722


@dataclass
@@ -38,7 +39,7 @@ class SummaryConfig:
    is_enabled: bool = False
    max_results: int = 7
    response_lang: str = "eng"
-    prompt_name: str = "vectara-summary-ext-v1.2.0"
+    prompt_name: str = "vectara-summary-ext-24-05-med-omni"
    stream: bool = False


@@ -67,7 +68,7 @@ class MMRConfig:
 class RerankConfig:
    """Configuration for Reranker.

-    reranker: "mmr", "rerank_multilingual_v1" or "none"
+    reranker: "mmr", "rerank_multilingual_v1", "udf" or "none"
    rerank_k: number of results to fetch before reranking, defaults to 50
    mmr_diversity_bias: for MMR only - a number between 0 and 1 that determines
        the degree of diversity among the results with 0 corresponding
@@ -76,11 +77,13 @@ class RerankConfig:
        Note: mmr_diversity_bias is equivalent 1-lambda_mult
        where lambda_mult is the value often used in max_marginal_relevance_search()
        We chose to use that since we believe it's more intuitive to the user.
+    user_function: for UDF only - the user function to use for reranking.
    """

    reranker: str = "none"
    rerank_k: int = 50
    mmr_diversity_bias: float = 0.3
+    user_function: str = ""


@dataclass
@@ -445,7 +448,7 @@ class Vectara(VectorStore):
                        config.rerank_config.rerank_k
                        if (
                            config.rerank_config.reranker
-                            in ["mmr", "rerank_multilingual_v1"]
+                            in ["mmr", "udf", "rerank_multilingual_v1"]
                        )
                        else config.k
                    ),
@@ -473,6 +476,11 @@ class Vectara(VectorStore):
                "rerankerId": MMR_RERANKER_ID,
                "mmrConfig": {"diversityBias": config.rerank_config.mmr_diversity_bias},
            }
+        elif config.rerank_config.reranker == "udf":
+            body["query"][0]["rerankingConfig"] = {
+                "rerankerId": UDF_RERANKER_ID,
+                "userFunction": config.rerank_config.user_function,
+            }
        elif config.rerank_config.reranker == "rerank_multilingual_v1":
            body["query"][0]["rerankingConfig"] = {
                "rerankerId": RERANKER_MULTILINGUAL_V1_ID,
--- a/libs/community/tests/integration_tests/vectorstores/test_vectara.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_vectara.py
@@ -22,7 +22,7 @@ from langchain_community.vectorstores.vectara import (
 #    VECTARA_API_KEY, VECTARA_CORPUS_ID and VECTARA_CUSTOMER_ID
 #

-test_prompt_name = "vectara-experimental-summary-ext-2023-12-11-sml"
+test_prompt_name = "vectara-summary-ext-24-05-med-omni"


 def get_abbr(s: str) -> str:
@@ -299,8 +299,36 @@ def test_vectara_with_langchain_mmr(vectara3: Vectara) -> None:  # type: ignore[
    )


-def test_vectara_mmr(vectara3: Vectara) -> None:  # type: ignore[no-untyped-def]
-    # test MMR directly with rerank_config
+def test_vectara_rerankers(vectara3: Vectara) -> None:  # type: ignore[no-untyped-def]
+    # test Vectara multi-lingual reranker
+    summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
+    rerank_config = RerankConfig(reranker="rerank_multilingual_v1", rerank_k=50)
+    config = VectaraQueryConfig(
+        k=10,
+        lambda_val=0.005,
+        rerank_config=rerank_config,
+        summary_config=summary_config,
+    )
+    rag = vectara3.as_rag(config)
+    output1 = rag.invoke("what is generative AI?")["answer"]
+    assert len(output1) > 0
+
+    # test Vectara udf reranker
+    summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
+    rerank_config = RerankConfig(
+        reranker="udf", rerank_k=50, user_function="get('$.score')"
+    )
+    config = VectaraQueryConfig(
+        k=10,
+        lambda_val=0.005,
+        rerank_config=rerank_config,
+        summary_config=summary_config,
+    )
+    rag = vectara3.as_rag(config)
+    output1 = rag.invoke("what is generative AI?")["answer"]
+    assert len(output1) > 0
+
+    # test Vectara MMR reranker
    summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
    rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
    config = VectaraQueryConfig(