community: update Vectara integration (#27869)

Thank you for contributing to LangChain!

- **Description:** Updated Vectara integration
- **Issue:** refresh on descriptions across all demos and added UDF
reranker
- **Dependencies:** None
- **Twitter handle:** @ofermend

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Ofer Mendelevitch
2024-11-04 12:40:39 -08:00
committed by GitHub
parent 14a71a6e77
commit d7c39e6dbb
6 changed files with 60 additions and 25 deletions

View File

@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
MMR_RERANKER_ID = 272725718
RERANKER_MULTILINGUAL_V1_ID = 272725719
UDF_RERANKER_ID = 272725722
@dataclass
@@ -38,7 +39,7 @@ class SummaryConfig:
is_enabled: bool = False
max_results: int = 7
response_lang: str = "eng"
prompt_name: str = "vectara-summary-ext-v1.2.0"
prompt_name: str = "vectara-summary-ext-24-05-med-omni"
stream: bool = False
@@ -67,7 +68,7 @@ class MMRConfig:
class RerankConfig:
"""Configuration for Reranker.
reranker: "mmr", "rerank_multilingual_v1" or "none"
reranker: "mmr", "rerank_multilingual_v1", "udf" or "none"
rerank_k: number of results to fetch before reranking, defaults to 50
mmr_diversity_bias: for MMR only - a number between 0 and 1 that determines
the degree of diversity among the results with 0 corresponding
@@ -76,11 +77,13 @@ class RerankConfig:
Note: mmr_diversity_bias is equivalent 1-lambda_mult
where lambda_mult is the value often used in max_marginal_relevance_search()
We chose to use that since we believe it's more intuitive to the user.
user_function: for UDF only - the user function to use for reranking.
"""
reranker: str = "none"
rerank_k: int = 50
mmr_diversity_bias: float = 0.3
user_function: str = ""
@dataclass
@@ -445,7 +448,7 @@ class Vectara(VectorStore):
config.rerank_config.rerank_k
if (
config.rerank_config.reranker
in ["mmr", "rerank_multilingual_v1"]
in ["mmr", "udf", "rerank_multilingual_v1"]
)
else config.k
),
@@ -473,6 +476,11 @@ class Vectara(VectorStore):
"rerankerId": MMR_RERANKER_ID,
"mmrConfig": {"diversityBias": config.rerank_config.mmr_diversity_bias},
}
elif config.rerank_config.reranker == "udf":
body["query"][0]["rerankingConfig"] = {
"rerankerId": UDF_RERANKER_ID,
"userFunction": config.rerank_config.user_function,
}
elif config.rerank_config.reranker == "rerank_multilingual_v1":
body["query"][0]["rerankingConfig"] = {
"rerankerId": RERANKER_MULTILINGUAL_V1_ID,

View File

@@ -22,7 +22,7 @@ from langchain_community.vectorstores.vectara import (
# VECTARA_API_KEY, VECTARA_CORPUS_ID and VECTARA_CUSTOMER_ID
#
test_prompt_name = "vectara-experimental-summary-ext-2023-12-11-sml"
test_prompt_name = "vectara-summary-ext-24-05-med-omni"
def get_abbr(s: str) -> str:
@@ -299,8 +299,36 @@ def test_vectara_with_langchain_mmr(vectara3: Vectara) -> None: # type: ignore[
)
def test_vectara_mmr(vectara3: Vectara) -> None: # type: ignore[no-untyped-def]
# test MMR directly with rerank_config
def test_vectara_rerankers(vectara3: Vectara) -> None: # type: ignore[no-untyped-def]
# test Vectara multi-lingual reranker
summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
rerank_config = RerankConfig(reranker="rerank_multilingual_v1", rerank_k=50)
config = VectaraQueryConfig(
k=10,
lambda_val=0.005,
rerank_config=rerank_config,
summary_config=summary_config,
)
rag = vectara3.as_rag(config)
output1 = rag.invoke("what is generative AI?")["answer"]
assert len(output1) > 0
# test Vectara udf reranker
summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
rerank_config = RerankConfig(
reranker="udf", rerank_k=50, user_function="get('$.score')"
)
config = VectaraQueryConfig(
k=10,
lambda_val=0.005,
rerank_config=rerank_config,
summary_config=summary_config,
)
rag = vectara3.as_rag(config)
output1 = rag.invoke("what is generative AI?")["answer"]
assert len(output1) > 0
# test Vectara MMR reranker
summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
config = VectaraQueryConfig(