mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 12:59:07 +00:00
community: update Vectara integration (#27869)
Thank you for contributing to LangChain! - **Description:** Updated Vectara integration - **Issue:** refresh on descriptions across all demos and added UDF reranker - **Dependencies:** None - **Twitter handle:** @ofermend --------- Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
committed by
GitHub
parent
14a71a6e77
commit
d7c39e6dbb
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
MMR_RERANKER_ID = 272725718
|
||||
RERANKER_MULTILINGUAL_V1_ID = 272725719
|
||||
UDF_RERANKER_ID = 272725722
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -38,7 +39,7 @@ class SummaryConfig:
|
||||
is_enabled: bool = False
|
||||
max_results: int = 7
|
||||
response_lang: str = "eng"
|
||||
prompt_name: str = "vectara-summary-ext-v1.2.0"
|
||||
prompt_name: str = "vectara-summary-ext-24-05-med-omni"
|
||||
stream: bool = False
|
||||
|
||||
|
||||
@@ -67,7 +68,7 @@ class MMRConfig:
|
||||
class RerankConfig:
|
||||
"""Configuration for Reranker.
|
||||
|
||||
reranker: "mmr", "rerank_multilingual_v1" or "none"
|
||||
reranker: "mmr", "rerank_multilingual_v1", "udf" or "none"
|
||||
rerank_k: number of results to fetch before reranking, defaults to 50
|
||||
mmr_diversity_bias: for MMR only - a number between 0 and 1 that determines
|
||||
the degree of diversity among the results with 0 corresponding
|
||||
@@ -76,11 +77,13 @@ class RerankConfig:
|
||||
Note: mmr_diversity_bias is equivalent 1-lambda_mult
|
||||
where lambda_mult is the value often used in max_marginal_relevance_search()
|
||||
We chose to use that since we believe it's more intuitive to the user.
|
||||
user_function: for UDF only - the user function to use for reranking.
|
||||
"""
|
||||
|
||||
reranker: str = "none"
|
||||
rerank_k: int = 50
|
||||
mmr_diversity_bias: float = 0.3
|
||||
user_function: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -445,7 +448,7 @@ class Vectara(VectorStore):
|
||||
config.rerank_config.rerank_k
|
||||
if (
|
||||
config.rerank_config.reranker
|
||||
in ["mmr", "rerank_multilingual_v1"]
|
||||
in ["mmr", "udf", "rerank_multilingual_v1"]
|
||||
)
|
||||
else config.k
|
||||
),
|
||||
@@ -473,6 +476,11 @@ class Vectara(VectorStore):
|
||||
"rerankerId": MMR_RERANKER_ID,
|
||||
"mmrConfig": {"diversityBias": config.rerank_config.mmr_diversity_bias},
|
||||
}
|
||||
elif config.rerank_config.reranker == "udf":
|
||||
body["query"][0]["rerankingConfig"] = {
|
||||
"rerankerId": UDF_RERANKER_ID,
|
||||
"userFunction": config.rerank_config.user_function,
|
||||
}
|
||||
elif config.rerank_config.reranker == "rerank_multilingual_v1":
|
||||
body["query"][0]["rerankingConfig"] = {
|
||||
"rerankerId": RERANKER_MULTILINGUAL_V1_ID,
|
||||
|
@@ -22,7 +22,7 @@ from langchain_community.vectorstores.vectara import (
|
||||
# VECTARA_API_KEY, VECTARA_CORPUS_ID and VECTARA_CUSTOMER_ID
|
||||
#
|
||||
|
||||
test_prompt_name = "vectara-experimental-summary-ext-2023-12-11-sml"
|
||||
test_prompt_name = "vectara-summary-ext-24-05-med-omni"
|
||||
|
||||
|
||||
def get_abbr(s: str) -> str:
|
||||
@@ -299,8 +299,36 @@ def test_vectara_with_langchain_mmr(vectara3: Vectara) -> None: # type: ignore[
|
||||
)
|
||||
|
||||
|
||||
def test_vectara_mmr(vectara3: Vectara) -> None: # type: ignore[no-untyped-def]
|
||||
# test MMR directly with rerank_config
|
||||
def test_vectara_rerankers(vectara3: Vectara) -> None: # type: ignore[no-untyped-def]
|
||||
# test Vectara multi-lingual reranker
|
||||
summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
|
||||
rerank_config = RerankConfig(reranker="rerank_multilingual_v1", rerank_k=50)
|
||||
config = VectaraQueryConfig(
|
||||
k=10,
|
||||
lambda_val=0.005,
|
||||
rerank_config=rerank_config,
|
||||
summary_config=summary_config,
|
||||
)
|
||||
rag = vectara3.as_rag(config)
|
||||
output1 = rag.invoke("what is generative AI?")["answer"]
|
||||
assert len(output1) > 0
|
||||
|
||||
# test Vectara udf reranker
|
||||
summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
|
||||
rerank_config = RerankConfig(
|
||||
reranker="udf", rerank_k=50, user_function="get('$.score')"
|
||||
)
|
||||
config = VectaraQueryConfig(
|
||||
k=10,
|
||||
lambda_val=0.005,
|
||||
rerank_config=rerank_config,
|
||||
summary_config=summary_config,
|
||||
)
|
||||
rag = vectara3.as_rag(config)
|
||||
output1 = rag.invoke("what is generative AI?")["answer"]
|
||||
assert len(output1) > 0
|
||||
|
||||
# test Vectara MMR reranker
|
||||
summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang="eng")
|
||||
rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
|
||||
config = VectaraQueryConfig(
|
||||
|
Reference in New Issue
Block a user