diff --git a/libs/community/langchain_community/document_loaders/git.py b/libs/community/langchain_community/document_loaders/git.py index 8d97ecd3df4..b37f575eacb 100644 --- a/libs/community/langchain_community/document_loaders/git.py +++ b/libs/community/langchain_community/document_loaders/git.py @@ -1,5 +1,5 @@ import os -from typing import Callable, List, Optional +from typing import Callable, Iterator, Optional from langchain_core.documents import Document @@ -39,7 +39,7 @@ class GitLoader(BaseLoader): self.branch = branch self.file_filter = file_filter - def load(self) -> List[Document]: + def lazy_load(self) -> Iterator[Document]: try: from git import Blob, Repo except ImportError as ex: @@ -68,8 +68,6 @@ class GitLoader(BaseLoader): repo = Repo(self.repo_path) repo.git.checkout(self.branch) - docs: List[Document] = [] - for item in repo.tree().traverse(): if not isinstance(item, Blob): continue @@ -102,9 +100,6 @@ class GitLoader(BaseLoader): "file_name": item.name, "file_type": file_type, } - doc = Document(page_content=text_content, metadata=metadata) - docs.append(doc) + yield Document(page_content=text_content, metadata=metadata) except Exception as e: print(f"Error reading file {file_path}: {e}") # noqa: T201 - - return docs