From f3601b0aaff4a7e9a0945a4e7bc784e44f84a481 Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Wed, 17 Jan 2024 00:00:55 -0800 Subject: [PATCH] Community[Patch] Remove docs form bm25 repr (#16110) Resolves: https://github.com/langchain-ai/langsmith-sdk/issues/356 --- libs/community/langchain_community/retrievers/bm25.py | 3 ++- .../tests/unit_tests/retrievers/test_bm25.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/libs/community/langchain_community/retrievers/bm25.py b/libs/community/langchain_community/retrievers/bm25.py index c0e0b248313..0ebaa2c0cd2 100644 --- a/libs/community/langchain_community/retrievers/bm25.py +++ b/libs/community/langchain_community/retrievers/bm25.py @@ -4,6 +4,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional from langchain_core.callbacks import CallbackManagerForRetrieverRun from langchain_core.documents import Document +from langchain_core.pydantic_v1 import Field from langchain_core.retrievers import BaseRetriever @@ -16,7 +17,7 @@ class BM25Retriever(BaseRetriever): vectorizer: Any """ BM25 vectorizer.""" - docs: List[Document] + docs: List[Document] = Field(repr=False) """ List of documents.""" k: int = 4 """ Number of documents to return.""" diff --git a/libs/community/tests/unit_tests/retrievers/test_bm25.py b/libs/community/tests/unit_tests/retrievers/test_bm25.py index d36f6dae15a..ef40b25ba7d 100644 --- a/libs/community/tests/unit_tests/retrievers/test_bm25.py +++ b/libs/community/tests/unit_tests/retrievers/test_bm25.py @@ -32,3 +32,14 @@ def test_from_documents() -> None: bm25_retriever = BM25Retriever.from_documents(documents=input_docs) assert len(bm25_retriever.docs) == 3 assert bm25_retriever.vectorizer.doc_len == [4, 5, 4] + + +@pytest.mark.requires("rank_bm25") +def test_repr() -> None: + input_docs = [ + Document(page_content="I have a pen."), + Document(page_content="Do you have a pen?"), + Document(page_content="I have a bag."), + ] + bm25_retriever = BM25Retriever.from_documents(documents=input_docs) + assert "I have a pen" not in repr(bm25_retriever)