Community[Patch] Remove docs form bm25 repr (#16110)

Resolves: https://github.com/langchain-ai/langsmith-sdk/issues/356
This commit is contained in:
William FH 2024-01-17 00:00:55 -08:00 committed by GitHub
parent c323742f4f
commit f3601b0aaf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 13 additions and 1 deletions

View File

@ -4,6 +4,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional
from langchain_core.callbacks import CallbackManagerForRetrieverRun from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document from langchain_core.documents import Document
from langchain_core.pydantic_v1 import Field
from langchain_core.retrievers import BaseRetriever from langchain_core.retrievers import BaseRetriever
@ -16,7 +17,7 @@ class BM25Retriever(BaseRetriever):
vectorizer: Any vectorizer: Any
""" BM25 vectorizer.""" """ BM25 vectorizer."""
docs: List[Document] docs: List[Document] = Field(repr=False)
""" List of documents.""" """ List of documents."""
k: int = 4 k: int = 4
""" Number of documents to return.""" """ Number of documents to return."""

View File

@ -32,3 +32,14 @@ def test_from_documents() -> None:
bm25_retriever = BM25Retriever.from_documents(documents=input_docs) bm25_retriever = BM25Retriever.from_documents(documents=input_docs)
assert len(bm25_retriever.docs) == 3 assert len(bm25_retriever.docs) == 3
assert bm25_retriever.vectorizer.doc_len == [4, 5, 4] assert bm25_retriever.vectorizer.doc_len == [4, 5, 4]
@pytest.mark.requires("rank_bm25")
def test_repr() -> None:
input_docs = [
Document(page_content="I have a pen."),
Document(page_content="Do you have a pen?"),
Document(page_content="I have a bag."),
]
bm25_retriever = BM25Retriever.from_documents(documents=input_docs)
assert "I have a pen" not in repr(bm25_retriever)