Harrison/tfidf parameters (#3481)

Co-authored-by: pao <go5kuramubon@gmail.com>
Co-authored-by: KyoHattori <kyo.hattori@abejainc.com>
This commit is contained in:
Harrison Chase
2023-04-24 22:19:58 -07:00
committed by GitHub
parent eda69b13f3
commit 7257f9e015
2 changed files with 26 additions and 3 deletions

View File

@@ -0,0 +1,17 @@
from langchain.retrievers.tfidf import TFIDFRetriever
def test_from_texts() -> None:
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
tfidf_retriever = TFIDFRetriever.from_texts(texts=input_texts)
assert len(tfidf_retriever.docs) == 3
assert tfidf_retriever.tfidf_array.toarray().shape == (3, 5)
def test_from_texts_with_tfidf_params() -> None:
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
tfidf_retriever = TFIDFRetriever.from_texts(
texts=input_texts, tfidf_params={"min_df": 2}
)
# should count only multiple words (have, pan)
assert tfidf_retriever.tfidf_array.toarray().shape == (3, 2)