From c8a171a154ed8c953ea26dc161efbc788942bff9 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Tue, 5 Mar 2024 18:35:50 +0100 Subject: [PATCH] community: Implement lazy_load() for GithubFileLoader (#18584) --- .../langchain_community/document_loaders/github.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/github.py b/libs/community/langchain_community/document_loaders/github.py index b896a615667..e2d4e0fb927 100644 --- a/libs/community/langchain_community/document_loaders/github.py +++ b/libs/community/langchain_community/document_loaders/github.py @@ -217,9 +217,7 @@ class GithubFileLoader(BaseGitHubLoader, ABC): return "" - def load(self) -> List[Document]: - documents = [] - + def lazy_load(self) -> Iterator[Document]: files = self.get_file_paths() for file in files: content = self.get_file_content_by_path(file["path"]) @@ -232,6 +230,4 @@ class GithubFileLoader(BaseGitHubLoader, ABC): "source": f"{self.github_api_url}/{self.repo}/{file['type']}/" f"{self.branch}/{file['path']}", } - documents.append(Document(page_content=content, metadata=metadata)) - - return documents + yield Document(page_content=content, metadata=metadata)