mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-05 04:55:14 +00:00
fixed similarity_search_with_score to really use a score
updated unit test with a test for score threshold Updated demo notebook
This commit is contained in:
@@ -245,6 +245,7 @@ class Vectara(VectorStore):
|
||||
k: int = 5,
|
||||
lambda_val: float = 0.025,
|
||||
filter: Optional[str] = None,
|
||||
score_threshold: Optional[float] = None,
|
||||
n_sentence_context: int = 2,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
@@ -258,6 +259,8 @@ class Vectara(VectorStore):
|
||||
filter can be "doc.rating > 3.0 and part.lang = 'deu'"} see
|
||||
https://docs.vectara.com/docs/search-apis/sql/filter-overview
|
||||
for more details.
|
||||
score_threshold: minimal score thresold for the result.
|
||||
If defined, results with score less than this value will be filtered out.
|
||||
n_sentence_context: number of sentences before/after the matching segment
|
||||
to add, defaults to 2
|
||||
|
||||
@@ -305,7 +308,10 @@ class Vectara(VectorStore):
|
||||
|
||||
result = response.json()
|
||||
|
||||
responses = result["responseSet"][0]["response"]
|
||||
if score_threshold:
|
||||
responses = [r for r in result["responseSet"][0]["response"] if r["score"] > score_threshold]
|
||||
else:
|
||||
responses = result["responseSet"][0]["response"]
|
||||
documents = result["responseSet"][0]["document"]
|
||||
|
||||
metadatas = []
|
||||
@@ -316,7 +322,7 @@ class Vectara(VectorStore):
|
||||
md.update(doc_md)
|
||||
metadatas.append(md)
|
||||
|
||||
docs = [
|
||||
docs_with_score = [
|
||||
(
|
||||
Document(
|
||||
page_content=x["text"],
|
||||
@@ -327,7 +333,7 @@ class Vectara(VectorStore):
|
||||
for x, md in zip(responses, metadatas)
|
||||
]
|
||||
|
||||
return docs
|
||||
return docs_with_score
|
||||
|
||||
def similarity_search(
|
||||
self,
|
||||
@@ -358,6 +364,7 @@ class Vectara(VectorStore):
|
||||
k=k,
|
||||
lambda_val=lambda_val,
|
||||
filter=filter,
|
||||
score_threshold=None,
|
||||
n_sentence_context=n_sentence_context,
|
||||
**kwargs,
|
||||
)
|
||||
|
@@ -68,6 +68,19 @@ def test_vectara_add_documents() -> None:
|
||||
assert output2[0].page_content == "retrieval augmented generation"
|
||||
assert output2[0].metadata["abbr"] == "rag"
|
||||
|
||||
# test without filter but with similarity score
|
||||
# this is similar to the first test, but given the score threshold
|
||||
# we only get one result
|
||||
output3 = docsearch.similarity_search_with_score(
|
||||
"large language model",
|
||||
k=2,
|
||||
score_threshold=0.1,
|
||||
n_sentence_context=0,
|
||||
)
|
||||
assert len(output3) == 1
|
||||
assert output3[0][0].page_content == "large language model"
|
||||
assert output3[0][0].metadata["abbr"] == "llm"
|
||||
|
||||
for doc_id in doc_ids:
|
||||
docsearch._delete_doc(doc_id)
|
||||
|
||||
|
Reference in New Issue
Block a user