mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-11 22:04:37 +00:00
parent
9a6f7e213b
commit
5985454269
@ -1,5 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Callable, List, Optional
|
from typing import Callable, Iterator, Optional
|
||||||
|
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
@ -39,7 +39,7 @@ class GitLoader(BaseLoader):
|
|||||||
self.branch = branch
|
self.branch = branch
|
||||||
self.file_filter = file_filter
|
self.file_filter = file_filter
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
try:
|
try:
|
||||||
from git import Blob, Repo
|
from git import Blob, Repo
|
||||||
except ImportError as ex:
|
except ImportError as ex:
|
||||||
@ -68,8 +68,6 @@ class GitLoader(BaseLoader):
|
|||||||
repo = Repo(self.repo_path)
|
repo = Repo(self.repo_path)
|
||||||
repo.git.checkout(self.branch)
|
repo.git.checkout(self.branch)
|
||||||
|
|
||||||
docs: List[Document] = []
|
|
||||||
|
|
||||||
for item in repo.tree().traverse():
|
for item in repo.tree().traverse():
|
||||||
if not isinstance(item, Blob):
|
if not isinstance(item, Blob):
|
||||||
continue
|
continue
|
||||||
@ -102,9 +100,6 @@ class GitLoader(BaseLoader):
|
|||||||
"file_name": item.name,
|
"file_name": item.name,
|
||||||
"file_type": file_type,
|
"file_type": file_type,
|
||||||
}
|
}
|
||||||
doc = Document(page_content=text_content, metadata=metadata)
|
yield Document(page_content=text_content, metadata=metadata)
|
||||||
docs.append(doc)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error reading file {file_path}: {e}") # noqa: T201
|
print(f"Error reading file {file_path}: {e}") # noqa: T201
|
||||||
|
|
||||||
return docs
|
|
||||||
|
Loading…
Reference in New Issue
Block a user