mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-19 11:08:55 +00:00
# Change Default GoogleDriveLoader Behavior to not Load Trashed Files (issue #5104) Fixes #5104 If the previous behavior of loading files that used to live in the folder, but are now trashed, you can use the `load_trashed_files` parameter: ``` loader = GoogleDriveLoader( folder_id="1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5", recursive=False, load_trashed_files=True ) ``` As not loading trashed files should be expected behavior, should we 1. even provide the `load_trashed_files` parameter? 2. add documentation? Feels most users will stick with default behavior ## Who can review? Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested: DataLoaders - @eyurtsev Twitter: [@nicholasliu77](https://twitter.com/nicholasliu77)
This commit is contained in:
parent
eff31a3361
commit
f0ea093de8
@ -31,6 +31,7 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
|
|||||||
file_ids: Optional[List[str]] = None
|
file_ids: Optional[List[str]] = None
|
||||||
recursive: bool = False
|
recursive: bool = False
|
||||||
file_types: Optional[Sequence[str]] = None
|
file_types: Optional[Sequence[str]] = None
|
||||||
|
load_trashed_files: bool = False
|
||||||
|
|
||||||
@root_validator
|
@root_validator
|
||||||
def validate_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
def validate_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
@ -215,8 +216,10 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
|
|||||||
_files = files
|
_files = files
|
||||||
|
|
||||||
returns = []
|
returns = []
|
||||||
for file in _files:
|
for file in files:
|
||||||
if file["mimeType"] == "application/vnd.google-apps.document":
|
if file["trashed"] and not self.load_trashed_files:
|
||||||
|
continue
|
||||||
|
elif file["mimeType"] == "application/vnd.google-apps.document":
|
||||||
returns.append(self._load_document_from_id(file["id"])) # type: ignore
|
returns.append(self._load_document_from_id(file["id"])) # type: ignore
|
||||||
elif file["mimeType"] == "application/vnd.google-apps.spreadsheet":
|
elif file["mimeType"] == "application/vnd.google-apps.spreadsheet":
|
||||||
returns.extend(self._load_sheet_from_id(file["id"])) # type: ignore
|
returns.extend(self._load_sheet_from_id(file["id"])) # type: ignore
|
||||||
@ -224,7 +227,6 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
|
|||||||
returns.extend(self._load_file_from_id(file["id"])) # type: ignore
|
returns.extend(self._load_file_from_id(file["id"])) # type: ignore
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return returns
|
return returns
|
||||||
|
|
||||||
def _fetch_files_recursive(
|
def _fetch_files_recursive(
|
||||||
@ -238,7 +240,7 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
|
|||||||
pageSize=1000,
|
pageSize=1000,
|
||||||
includeItemsFromAllDrives=True,
|
includeItemsFromAllDrives=True,
|
||||||
supportsAllDrives=True,
|
supportsAllDrives=True,
|
||||||
fields="nextPageToken, files(id, name, mimeType, parents)",
|
fields="nextPageToken, files(id, name, mimeType, parents, trashed)",
|
||||||
)
|
)
|
||||||
.execute()
|
.execute()
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user