mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 00:11:17 +00:00
Harrison/tfidf parameters (#3481)
Co-authored-by: pao <go5kuramubon@gmail.com> Co-authored-by: KyoHattori <kyo.hattori@abejainc.com>
This commit is contained in:
17
tests/integration_tests/retrievers/test_tfidf.py
Normal file
17
tests/integration_tests/retrievers/test_tfidf.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from langchain.retrievers.tfidf import TFIDFRetriever
|
||||
|
||||
|
||||
def test_from_texts() -> None:
|
||||
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
|
||||
tfidf_retriever = TFIDFRetriever.from_texts(texts=input_texts)
|
||||
assert len(tfidf_retriever.docs) == 3
|
||||
assert tfidf_retriever.tfidf_array.toarray().shape == (3, 5)
|
||||
|
||||
|
||||
def test_from_texts_with_tfidf_params() -> None:
|
||||
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
|
||||
tfidf_retriever = TFIDFRetriever.from_texts(
|
||||
texts=input_texts, tfidf_params={"min_df": 2}
|
||||
)
|
||||
# should count only multiple words (have, pan)
|
||||
assert tfidf_retriever.tfidf_array.toarray().shape == (3, 2)
|
Reference in New Issue
Block a user