diff --git a/libs/community/langchain_community/document_loaders/sharepoint.py b/libs/community/langchain_community/document_loaders/sharepoint.py index 41ccf0147e6..e589a58447c 100644 --- a/libs/community/langchain_community/document_loaders/sharepoint.py +++ b/libs/community/langchain_community/document_loaders/sharepoint.py @@ -78,6 +78,7 @@ class SharePointLoader(O365BaseLoader, BaseLoader): auth_identities = self.authorized_identities(file_id) if self.load_extended_metadata is True: extended_metadata = self.get_extended_metadata(file_id) + extended_metadata.update({"source_full_url": target_folder.web_url}) for parsed_blob in blob_parser.lazy_parse(blob): if self.load_auth is True: parsed_blob.metadata["authorized_identities"] = auth_identities @@ -94,6 +95,7 @@ class SharePointLoader(O365BaseLoader, BaseLoader): auth_identities = self.authorized_identities(file_id) if self.load_extended_metadata is True: extended_metadata = self.get_extended_metadata(file_id) + extended_metadata.update({"source_full_url": target_folder.web_url}) for parsed_blob in blob_parser.lazy_parse(blob): if self.load_auth is True: parsed_blob.metadata["authorized_identities"] = auth_identities @@ -130,6 +132,9 @@ class SharePointLoader(O365BaseLoader, BaseLoader): blob_part.metadata["authorized_identities"] = auth_identities if self.load_extended_metadata is True: blob_part.metadata.update(extended_metadata) + blob_part.metadata.update( + {"source_full_url": target_folder.web_url} + ) yield blob_part def authorized_identities(self, file_id: str) -> List: