diff --git a/libs/community/langchain_community/retrievers/bm25.py b/libs/community/langchain_community/retrievers/bm25.py index c0e0b248313..0ebaa2c0cd2 100644 --- a/libs/community/langchain_community/retrievers/bm25.py +++ b/libs/community/langchain_community/retrievers/bm25.py @@ -4,6 +4,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional from langchain_core.callbacks import CallbackManagerForRetrieverRun from langchain_core.documents import Document +from langchain_core.pydantic_v1 import Field from langchain_core.retrievers import BaseRetriever @@ -16,7 +17,7 @@ class BM25Retriever(BaseRetriever): vectorizer: Any """ BM25 vectorizer.""" - docs: List[Document] + docs: List[Document] = Field(repr=False) """ List of documents.""" k: int = 4 """ Number of documents to return.""" diff --git a/libs/community/tests/unit_tests/retrievers/test_bm25.py b/libs/community/tests/unit_tests/retrievers/test_bm25.py index d36f6dae15a..ef40b25ba7d 100644 --- a/libs/community/tests/unit_tests/retrievers/test_bm25.py +++ b/libs/community/tests/unit_tests/retrievers/test_bm25.py @@ -32,3 +32,14 @@ def test_from_documents() -> None: bm25_retriever = BM25Retriever.from_documents(documents=input_docs) assert len(bm25_retriever.docs) == 3 assert bm25_retriever.vectorizer.doc_len == [4, 5, 4] + + +@pytest.mark.requires("rank_bm25") +def test_repr() -> None: + input_docs = [ + Document(page_content="I have a pen."), + Document(page_content="Do you have a pen?"), + Document(page_content="I have a bag."), + ] + bm25_retriever = BM25Retriever.from_documents(documents=input_docs) + assert "I have a pen" not in repr(bm25_retriever)